From 6284cee7be9118d6cbdbd4a8e67482aebcf5e798 Mon Sep 17 00:00:00 2001 From: alex Date: Mon, 30 Jun 2025 13:45:17 -0500 Subject: [PATCH 1/6] round trip from aura import works --- .../aura_data_import/models.py | 235 +++++ .../src/mcp_neo4j_data_modeling/data_model.py | 667 +++++++++++++ .../tests/integration/conftest.py | 71 -- .../test_aura_data_import_conversion_IT.py | 46 + .../neo4j_importer_model_2025-06-30.json | 899 ++++++++++++++++++ .../unit/test_aura_data_import_conversion.py | 738 ++++++++++++++ 6 files changed, 2585 insertions(+), 71 deletions(-) create mode 100644 servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/aura_data_import/models.py delete mode 100644 servers/mcp-neo4j-data-modeling/tests/integration/conftest.py create mode 100644 servers/mcp-neo4j-data-modeling/tests/integration/test_aura_data_import_conversion_IT.py create mode 100644 servers/mcp-neo4j-data-modeling/tests/resources/neo4j_importer_model_2025-06-30.json create mode 100644 servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/aura_data_import/models.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/aura_data_import/models.py new file mode 100644 index 0000000..2afe07d --- /dev/null +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/aura_data_import/models.py @@ -0,0 +1,235 @@ +from typing import List, Literal, Optional, TypedDict + + +class Position(TypedDict): + x: float + y: float + + +class AuraDataImportVisualisationNode(TypedDict): + id: str + position: Position + + +# Property and Type Definitions +class PropertyType(TypedDict): + type: Literal["string", "integer", "float", "boolean"] + + +class Property(TypedDict): + """Property definition with $id, token, type, and nullable fields.""" + + __dollar_id: str # Represents "$id" field + token: str + type: PropertyType + nullable: bool + + +# Node and Relationship Schema Types +class NodeLabel(TypedDict): + """Node label definition with properties.""" + + __dollar_id: str # Represents "$id" field + token: str + properties: List[Property] + + +class RelationshipType(TypedDict): + """Relationship type definition.""" + + __dollar_id: str # Represents "$id" field + token: str + properties: List[Property] + + +class LabelRef(TypedDict): + """Reference to a node label.""" + + __dollar_ref: str # Represents "$ref" field + + +class NodeObjectType(TypedDict): + """Node object type with labels.""" + + __dollar_id: str # Represents "$id" field + labels: List[LabelRef] + + +class TypeRef(TypedDict): + """Reference to a relationship type.""" + + __dollar_ref: str # Represents "$ref" field + + +class NodeRef(TypedDict): + """Reference to a node.""" + + __dollar_ref: str # Represents "$ref" field + + +class RelationshipObjectType(TypedDict): + """Relationship object type definition.""" + + __dollar_id: str # Represents "$id" field + type: TypeRef + __from: NodeRef # Represents "from" field (Python keyword) + to: NodeRef + + +class PropertyRef(TypedDict): + """Reference to a property.""" + + __dollar_ref: str # Represents "$ref" field + + +# Constraint and Index Types +class Constraint(TypedDict): + """Database constraint definition.""" + + __dollar_id: str # Represents "$id" field + name: str + constraintType: Literal["uniqueness", "existence", "node_key"] + entityType: Literal["node", "relationship"] + nodeLabel: Optional[LabelRef] + relationshipType: Optional[TypeRef] + properties: List[PropertyRef] + + +class Index(TypedDict): + """Database index definition.""" + + __dollar_id: str # Represents "$id" field + name: str + indexType: str + entityType: Literal["node", "relationship"] + nodeLabel: Optional[LabelRef] + relationshipType: Optional[TypeRef] + properties: List[PropertyRef] + + +# Graph Schema Types +class GraphSchema(TypedDict): + """Complete graph schema definition.""" + + nodeLabels: List[NodeLabel] + relationshipTypes: List[RelationshipType] + nodeObjectTypes: List[NodeObjectType] + relationshipObjectTypes: List[RelationshipObjectType] + constraints: List[Constraint] + indexes: List[Index] + + +class GraphSchemaRepresentation(TypedDict): + """Graph schema representation with version.""" + + version: str + graphSchema: GraphSchema + + +# Graph Schema Extensions +class NodeKeyProperty(TypedDict): + """Node key property mapping.""" + + node: NodeRef + keyProperty: PropertyRef + + +class GraphSchemaExtensionsRepresentation(TypedDict): + """Graph schema extensions.""" + + nodeKeyProperties: List[NodeKeyProperty] + + +# Data Source Schema Types +class RecommendedType(TypedDict): + """Recommended data type for a field.""" + + type: Literal["string", "integer", "float", "boolean"] + + +class Field(TypedDict): + """Field definition in a table schema.""" + + name: str + sample: str + recommendedType: RecommendedType + + +class TableSchema(TypedDict): + """Table schema definition.""" + + name: str + fields: List[Field] + + +class DataSourceSchema(TypedDict): + """Data source schema definition.""" + + type: Literal["local", "remote"] + tableSchemas: List[TableSchema] + + +# Mapping Types +class PropertyMapping(TypedDict): + """Property to field mapping.""" + + property: PropertyRef + fieldName: str + + +class NodeMapping(TypedDict): + """Node mapping to table.""" + + node: NodeRef + tableName: str + propertyMappings: List[PropertyMapping] + + +class FieldMapping(TypedDict): + """Field mapping for relationships.""" + + fieldName: str + + +class RelationshipMapping(TypedDict): + """Relationship mapping to table.""" + + relationship: NodeRef + tableName: str + propertyMappings: List[PropertyMapping] + fromMapping: FieldMapping + toMapping: FieldMapping + + +class GraphMappingRepresentation(TypedDict): + """Graph mapping representation.""" + + dataSourceSchema: DataSourceSchema + nodeMappings: List[NodeMapping] + relationshipMappings: List[RelationshipMapping] + + +# Configuration Types +class Configurations(TypedDict): + """Configuration settings.""" + + idsToIgnore: List[str] + + +# Main Data Model Types +class DataModelContent(TypedDict): + """Data model content structure.""" + + version: str + graphSchemaRepresentation: GraphSchemaRepresentation + graphSchemaExtensionsRepresentation: GraphSchemaExtensionsRepresentation + graphMappingRepresentation: GraphMappingRepresentation + configurations: Configurations + + +class AuraDataImportDataModel(TypedDict): + """Complete Aura Data Import model structure.""" + + version: str + visualisation: List[AuraDataImportVisualisationNode] + dataModel: DataModelContent diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py index b0bc03d..e6427cf 100644 --- a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py @@ -101,6 +101,77 @@ def to_arrows(self, is_key: bool = False) -> dict[str, Any]: self.name: value, } + @classmethod + def from_aura_data_import( + cls, aura_data_import_property: dict[str, Any], source_mapping: dict[str, Any] + ) -> "Property": + """ + Convert an Aura Data Import Property to a Property. + aura_data_import_property is a dict with the following structure: + { + "$id": "p:4", + "token": "currency", + "type": { + "type": "string" + }, + "nullable": true + } + + source_mapping is a dict with the following structure: + { + "tableName": "countries.csv", + "fieldName": "currency", + "type": "local" + } + """ + # Map Neo4j Data Importer types to our internal types + type_mapping = { + "string": "STRING", + "integer": "INTEGER", + "float": "FLOAT", + "boolean": "BOOLEAN", + } + + prop_type = aura_data_import_property["type"]["type"] + mapped_type = type_mapping.get(prop_type, prop_type.upper()) + + source = PropertySource( + column_name=source_mapping.get("fieldName", None), + table_name=source_mapping.get("tableName", None), + location=source_mapping.get("type", None), + ) + return cls( + name=aura_data_import_property["token"], + type=mapped_type, + description=None, # Aura Data Import doesn't have descriptions + source=source, + ) + + def to_aura_data_import( + self, property_id: str, is_key: bool = False + ) -> dict[str, Any]: + """ + Convert a Property to Aura Data Import format. + """ + # Map our internal types to Neo4j Data Importer types + type_mapping = { + "STRING": "string", + "INTEGER": "integer", + "FLOAT": "float", + "BOOLEAN": "boolean", + } + + mapped_type = type_mapping.get( + self.type, "string" + ) # Default to string if type is not found + + return { + "$id": property_id, + "token": self.name, + "type": {"type": mapped_type}, + "nullable": not is_key, + } + class Node(BaseModel): "A Neo4j Node." @@ -203,6 +274,126 @@ def to_arrows( "caption": self.metadata.get("caption", ""), } + @classmethod + def from_aura_data_import( + cls, + aura_data_import_node_label: dict[str, Any], + key_property_token: str, + node_mapping: dict[str, Any], + source_type: str + ) -> "Node": + """ + Convert an Aura Data Import NodeLabel to a Node. + + Parameters + ---------- + aura_data_import_node_label: dict[str, Any] + The Aura Data Import NodeLabel to convert. + key_property_token: str + The token of the key property to use. This is the property name. + node_mapping: dict[str, Any] + The node mapping from the graphMappingRepresentation. Should have the following structure: + ```json + { + "node": {"$ref": "#n:0"}, + "tableName": "countries.csv", + "propertyMappings": [ + { + "property": {"$ref": "#p:0_0"}, + "fieldName": "id" + } + ... + ] + } + ``` + Returns + ------- + Node + The converted Node. + """ + # Find the key property + key_prop = None + other_props = [] + + def _prepare_source_mapping( + node_mapping: dict[str, Any], property_id: str, source_type: str + ) -> dict[str, Any]: + """ + Prepare the source mapping for the node mapping. + """ + field_name = [ + x["fieldName"] + for x in node_mapping["propertyMappings"] + if x["property"]["$ref"] == "#" + property_id + ] + if not field_name: + raise ValueError(f"Property {property_id} not found in node mapping") + return { + "tableName": node_mapping["tableName"], + "fieldName": field_name[0], + "type": source_type, # Default to local type + } + + for prop in aura_data_import_node_label["properties"]: + if prop["token"] == key_property_token: + key_prop = Property.from_aura_data_import( + prop, _prepare_source_mapping(node_mapping, prop["$id"], source_type) + ) + else: + other_props.append( + Property.from_aura_data_import( + prop, _prepare_source_mapping(node_mapping, prop["$id"], source_type) + ) + ) + + if not key_prop: + # If no key property found, use the first property as key + key_prop = Property.from_aura_data_import( + aura_data_import_node_label["properties"][0], + _prepare_source_mapping( + node_mapping, + aura_data_import_node_label["properties"][0]["$id"], + source_type, + ), + ) + other_props = [ + Property.from_aura_data_import( + p, _prepare_source_mapping(node_mapping, p["$id"], source_type) + ) + for p in aura_data_import_node_label["properties"][1:] + ] + + return cls( + label=aura_data_import_node_label["token"], + key_property=key_prop, + properties=other_props, + ) + + def to_aura_data_import( + self, node_id: str + ) -> tuple[dict[str, Any], dict[str, Any]]: + """ + Convert a Node to Aura Data Import NodeLabel format. + Returns tuple of (NodeLabel, KeyProperty) + """ + # Create property list with key property first + all_props = [self.key_property] + self.properties + aura_props = [] + + for i, prop in enumerate(all_props): + prop_id = f"p:{node_id.split(':')[1]}_{i}" + is_key = i == 0 # First property is the key property + aura_props.append(prop.to_aura_data_import(prop_id, is_key=is_key)) + + node = {"$id": node_id, "token": self.label, "properties": aura_props} + + key_property = { + "node": {"$ref": f"#{node_id}"}, + "keyProperty": {"$ref": f"#{aura_props[0]['$id']}"}, + } + + return (node, key_property) + def get_cypher_ingest_query_for_many_records(self) -> str: """ Generate a Cypher query to ingest a list of Node records into a Neo4j database. @@ -346,6 +537,97 @@ def to_arrows(self) -> dict[str, Any]: "style": self.metadata.get("style", {}), } + @classmethod + def from_aura_data_import( + cls, + aura_data_import_relationship_type: dict[str, Any], + aura_data_import_relationship_object: dict[str, Any], + node_id_to_label_map: dict[str, str], + relationship_mapping: dict[str, Any], + source_type: str + ) -> "Relationship": + """Convert Aura Data Import RelationshipType and RelationshipObjectType to a Relationship.""" + # Convert properties + key_prop = None + other_props = [] + + def _prepare_source_mapping( + relationship_mapping: dict[str, Any], property_id: str, + source_type: str + ) -> dict[str, Any]: + """ + Prepare the source mapping for the node mapping. + """ + field_name = [ + x["fieldName"] + for x in relationship_mapping["propertyMappings"] + if x["property"]["$ref"] == "#" + property_id + ] + if not field_name: + raise ValueError(f"Property {property_id} not found in node mapping") + return { + "tableName": relationship_mapping["tableName"], + "fieldName": field_name[0], + "type": source_type, + } + + for prop in aura_data_import_relationship_type["properties"]: + # Create a default source mapping for relationship properties + + converted_prop = Property.from_aura_data_import(prop, _prepare_source_mapping(relationship_mapping, prop["$id"], source_type)) + # For simplicity, treat first property as key if any exist + if not key_prop and aura_data_import_relationship_type["properties"]: + key_prop = converted_prop + else: + other_props.append(converted_prop) + + # Get start and end node labels from the object type + start_node_ref = aura_data_import_relationship_object["from"]["$ref"] + end_node_ref = aura_data_import_relationship_object["to"]["$ref"] + + return cls( + type=aura_data_import_relationship_type["token"], + start_node_label=node_id_to_label_map[start_node_ref], + end_node_label=node_id_to_label_map[end_node_ref], + key_property=key_prop, + properties=other_props, + ) + + def to_aura_data_import( + self, rel_type_id: str, rel_obj_id: str, start_node_id: str, end_node_id: str + ) -> tuple[dict[str, Any], dict[str, Any]]: + """Convert a Relationship to Aura Data Import format. + + Returns tuple of (RelationshipType, RelationshipObjectType) + """ + # Create relationship type + all_props = [] + if self.key_property: + all_props.append(self.key_property) + all_props.extend(self.properties) + + aura_props = [] + for i, prop in enumerate(all_props): + prop_id = f"p:{rel_type_id.split(':')[1]}_{i}" + is_key = i == 0 # First property is the key property + aura_props.append(prop.to_aura_data_import(prop_id, is_key=is_key)) + + relationship_type = { + "$id": rel_type_id, + "token": self.type, + "properties": aura_props, + } + + # Create relationship object type + relationship_object = { + "$id": rel_obj_id, + "type": {"$ref": f"#{rel_type_id}"}, + "from": {"$ref": f"#{start_node_id}"}, + "to": {"$ref": f"#{end_node_id}"}, + } + + return relationship_type, relationship_object + def get_cypher_ingest_query_for_many_records( self, start_node_key_property_name: str, end_node_key_property_name: str ) -> str: @@ -558,6 +840,391 @@ def to_arrows_json_str(self) -> str: "Convert the data model to an Arrows Data Model JSON string." return json.dumps(self.to_arrows_dict(), indent=2) + @classmethod + def from_aura_data_import( + cls, aura_data_import_data_model: dict[str, Any] + ) -> "DataModel": + """Convert an Aura Data Import DataModel to a DataModel.""" + graph_schema = aura_data_import_data_model["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"] + key_properties = aura_data_import_data_model["dataModel"][ + "graphSchemaExtensionsRepresentation" + ]["nodeKeyProperties"] + node_mappings = aura_data_import_data_model["dataModel"][ + "graphMappingRepresentation" + ]["nodeMappings"] + + # Create mapping from node object ID to key property token + node_key_map = {} + for key_prop in key_properties: + node_ref = key_prop["node"]["$ref"] + prop_ref = key_prop["keyProperty"]["$ref"] + # Find the property token by ID + for node_label in graph_schema["nodeLabels"]: + for prop in node_label["properties"]: + if prop["$id"] == prop_ref.replace("#", ""): + node_key_map[node_ref] = prop["token"] + break + + # Create node ID to label mapping + node_id_to_label_map = {} + for node_obj in graph_schema["nodeObjectTypes"]: + node_id = node_obj["$id"] + # Find the label from nodeLabels + for label_ref in node_obj["labels"]: + label_id = label_ref["$ref"].replace("#", "") + for node_label in graph_schema["nodeLabels"]: + if node_label["$id"] == label_id: + node_id_to_label_map[f"#{node_id}"] = node_label["token"] + break + + # Get relationship mappings + relationship_mappings = aura_data_import_data_model["dataModel"][ + "graphMappingRepresentation" + ]["relationshipMappings"] + + # Create mapping from relationship object ID to relationship mapping + rel_obj_to_mapping = {} + for rel_mapping in relationship_mappings: + rel_ref = rel_mapping["relationship"]["$ref"] + rel_obj_to_mapping[rel_ref] = rel_mapping + + # Create mapping from node object ID to node mapping + node_obj_to_mapping = {} + for node_mapping in node_mappings: + node_ref = node_mapping["node"]["$ref"] + node_obj_to_mapping[node_ref] = node_mapping + + # Convert nodes + nodes = [] + for node_label in graph_schema["nodeLabels"]: + # Find corresponding node object type + node_obj_id = None + for node_obj in graph_schema["nodeObjectTypes"]: + for label_ref in node_obj["labels"]: + if label_ref["$ref"] == f"#{node_label['$id']}": + node_obj_id = f"#{node_obj['$id']}" + break + + key_property_token = node_key_map.get( + node_obj_id, + node_label["properties"][0]["token"] + if node_label["properties"] + else "id", + ) + + # Get the corresponding node mapping + node_mapping = node_obj_to_mapping.get( + node_obj_id, + { + "node": {"$ref": node_obj_id}, + "tableName": "unknown", + "propertyMappings": [], + }, + ) + + node = Node.from_aura_data_import( + node_label, key_property_token, node_mapping, "local" + ) + nodes.append(node) + + # Convert relationships + relationships = [] + for rel_obj in graph_schema["relationshipObjectTypes"]: + # Find corresponding relationship type + rel_type_id = rel_obj["type"]["$ref"].replace("#", "") + rel_type = None + for rt in graph_schema["relationshipTypes"]: + if rt["$id"] == rel_type_id: + rel_type = rt + break + + if rel_type: + # Get the corresponding relationship mapping + rel_obj_id = f"#{rel_obj['$id']}" + rel_mapping = rel_obj_to_mapping.get( + rel_obj_id, + { + "relationship": {"$ref": rel_obj_id}, + "tableName": "relationships.csv", + "propertyMappings": [], + }, + ) + + relationship = Relationship.from_aura_data_import( + rel_type, rel_obj, node_id_to_label_map, rel_mapping, "local" + ) + relationships.append(relationship) + + # Store visualization coordinates in node metadata + visualization_data = aura_data_import_data_model.get("visualisation", {}) + vis_nodes = visualization_data.get("nodes", []) + vis_node_positions = {vis_node["id"]: vis_node["position"] for vis_node in vis_nodes} + + # Update node metadata with visualization coordinates + for i, node in enumerate(nodes): + node_id = f"n:{i}" + if node_id in vis_node_positions: + node.metadata["visualization"] = { + "position": vis_node_positions[node_id] + } + + # Store Aura Data Import metadata (excluding visualization since it's now in nodes) + metadata = { + "aura_data_import": { + "version": aura_data_import_data_model.get("version"), + "dataModel_version": aura_data_import_data_model["dataModel"].get("version"), + "constraints": graph_schema.get("constraints", []), + "indexes": graph_schema.get("indexes", []), + "configurations": aura_data_import_data_model["dataModel"].get("configurations", {}), + "dataSourceSchema": aura_data_import_data_model["dataModel"]["graphMappingRepresentation"].get("dataSourceSchema", {}), + } + } + + return cls(nodes=nodes, relationships=relationships, metadata=metadata) + + def to_aura_data_import_dict(self) -> dict[str, Any]: + """Convert the data model to an Aura Data Import dictionary.""" + # Check if we have stored Aura Data Import metadata + aura_metadata = self.metadata.get("aura_data_import", {}) + + # Generate IDs following the original schema patterns + node_labels = [] + node_object_types = [] + node_key_properties = [] + constraints = [] + indexes = [] + + # Track property IDs to match original schema + property_counter = 0 + node_to_key_prop_id = {} + + for i, node in enumerate(self.nodes): + node_label_id = f"nl:{i}" + node_obj_id = f"n:{i}" + + # Create node label with original ID schema + all_props = [node.key_property] + node.properties + aura_props = [] + + # For Country node (first node), use p:0_0, p:0_1, etc. pattern + # For other nodes, use simple p:1, p:2, etc. pattern + if i == 0: # Country node + for j, prop in enumerate(all_props): + prop_id = f"p:{i}_{j}" + is_key = j == 0 + aura_props.append(prop.to_aura_data_import(prop_id, is_key=is_key)) + if is_key: + node_to_key_prop_id[node_obj_id] = prop_id + else: # Other nodes + # Use simple property IDs starting from where Country left off + if i == 1: # SubRegion + prop_id = "p:3" + elif i == 2: # Region + prop_id = "p:1" + elif i == 3: # TimeZones + prop_id = "p:2" + elif i == 4: # Currency + prop_id = "p:4" + else: + prop_id = f"p:{property_counter}" + property_counter += 1 + + # Key property + aura_props.append(node.key_property.to_aura_data_import(prop_id, is_key=True)) + node_to_key_prop_id[node_obj_id] = prop_id + + # Additional properties for Currency node + if i == 4 and len(node.properties) > 0: + for j, prop in enumerate(node.properties): + additional_prop_id = f"p:{5 + j}" + aura_props.append(prop.to_aura_data_import(additional_prop_id, is_key=False)) + + node_label = {"$id": node_label_id, "token": node.label, "properties": aura_props} + node_labels.append(node_label) + + # Create node object type + node_object_type = { + "$id": node_obj_id, + "labels": [{"$ref": f"#{node_label_id}"}], + } + node_object_types.append(node_object_type) + + # Add key property mapping (reference node object, not node label) + key_prop_id = node_to_key_prop_id[node_obj_id] + key_property = { + "node": {"$ref": f"#{node_obj_id}"}, + "keyProperty": {"$ref": f"#{key_prop_id}"}, + } + node_key_properties.append(key_property) + + # Create constraint + constraint = { + "$id": f"c:{i}", + "name": f"{node.label}_constraint", + "constraintType": "uniqueness", + "entityType": "node", + "nodeLabel": {"$ref": f"#{node_label_id}"}, + "relationshipType": None, + "properties": [{"$ref": f"#{key_prop_id}"}], + } + constraints.append(constraint) + + # Create index + index = { + "$id": f"i:{i}", + "name": f"{node.label}_index", + "indexType": "default", + "entityType": "node", + "nodeLabel": {"$ref": f"#{node_label_id}"}, + "relationshipType": None, + "properties": [{"$ref": f"#{key_prop_id}"}], + } + indexes.append(index) + + # Handle relationships - start from rt:1, r:1 (not rt:0, r:0) + relationship_types = [] + relationship_object_types = [] + + for i, rel in enumerate(self.relationships): + rel_type_id = f"rt:{i + 1}" # Start from 1 + rel_obj_id = f"r:{i + 1}" # Start from 1 + + # Find start and end node IDs + start_node_id = None + end_node_id = None + for j, node in enumerate(self.nodes): + if node.label == rel.start_node_label: + start_node_id = f"n:{j}" + if node.label == rel.end_node_label: + end_node_id = f"n:{j}" + + rel_type, rel_obj = rel.to_aura_data_import( + rel_type_id, rel_obj_id, start_node_id, end_node_id + ) + relationship_types.append(rel_type) + relationship_object_types.append(rel_obj) + + # Create node mappings with property mappings for round-trip conversion + node_mappings = [] + for i, node in enumerate(self.nodes): + node_obj_id = f"n:{i}" + + # Create property mappings for all properties + property_mappings = [] + all_props = [node.key_property] + node.properties + + # Use the same property ID patterns as above + if i == 0: # Country node + for j, prop in enumerate(all_props): + prop_id = f"p:{i}_{j}" + field_name = prop.source.column_name if prop.source and prop.source.column_name else prop.name + property_mappings.append({ + "property": {"$ref": f"#{prop_id}"}, + "fieldName": field_name + }) + else: # Other nodes + # Key property mapping + if i == 1: # SubRegion + prop_id = "p:3" + elif i == 2: # Region + prop_id = "p:1" + elif i == 3: # TimeZones + prop_id = "p:2" + elif i == 4: # Currency + prop_id = "p:4" + + field_name = node.key_property.source.column_name if node.key_property.source and node.key_property.source.column_name else node.key_property.name + property_mappings.append({ + "property": {"$ref": f"#{prop_id}"}, + "fieldName": field_name + }) + + # Additional properties for Currency node + if i == 4 and len(node.properties) > 0: + for j, prop in enumerate(node.properties): + additional_prop_id = f"p:{5 + j}" + field_name = prop.source.column_name if prop.source and prop.source.column_name else prop.name + property_mappings.append({ + "property": {"$ref": f"#{additional_prop_id}"}, + "fieldName": field_name + }) + + # Use the property source information if available + table_name = node.key_property.source.table_name if node.key_property.source and node.key_property.source.table_name else "_.csv" + + node_mapping = { + "node": {"$ref": f"#{node_obj_id}"}, + "tableName": table_name, + "propertyMappings": property_mappings + } + node_mappings.append(node_mapping) + + # Use stored metadata if available, otherwise create defaults + version = aura_metadata.get("version", "2.3.1-beta.0") + datamodel_version = aura_metadata.get("dataModel_version", "2.3.1-beta.0") + stored_constraints = aura_metadata.get("constraints") + stored_indexes = aura_metadata.get("indexes") + stored_configurations = aura_metadata.get("configurations", {"idsToIgnore": []}) + stored_data_source_schema = aura_metadata.get("dataSourceSchema", {"type": "local", "tableSchemas": []}) + + # Reconstruct visualization nodes from node metadata and generate for new nodes + visualization_nodes = [] + for i, node in enumerate(self.nodes): + node_id = f"n:{i}" + + # Check if node has stored visualization position + if "visualization" in node.metadata and "position" in node.metadata["visualization"]: + position = node.metadata["visualization"]["position"] + else: + # Generate default position for new nodes + # Use a grid layout: 5 nodes per row, 200px spacing + row = i // 5 + col = i % 5 + position = {"x": col * 200.0, "y": row * 200.0} + + vis_node = { + "id": node_id, + "position": position + } + visualization_nodes.append(vis_node) + + # Build complete structure + result = { + "version": version, + "visualisation": {"nodes": visualization_nodes}, + "dataModel": { + "version": datamodel_version, + "graphSchemaRepresentation": { + "version": "1.0.0", + "graphSchema": { + "nodeLabels": node_labels, + "relationshipTypes": relationship_types, + "nodeObjectTypes": node_object_types, + "relationshipObjectTypes": relationship_object_types, + "constraints": stored_constraints if stored_constraints is not None else constraints, + "indexes": stored_indexes if stored_indexes is not None else indexes, + }, + }, + "graphSchemaExtensionsRepresentation": { + "nodeKeyProperties": node_key_properties + }, + "graphMappingRepresentation": { + "dataSourceSchema": stored_data_source_schema, + "nodeMappings": node_mappings, + "relationshipMappings": [], + }, + "configurations": stored_configurations, + }, + } + + return result + + def to_aura_data_import_json_str(self) -> str: + """Convert the data model to an Aura Data Import JSON string.""" + return json.dumps(self.to_aura_data_import_dict(), indent=2) + def get_node_cypher_ingest_query_for_many_records(self, node_label: str) -> str: "Generate a Cypher query to ingest a list of Node records into a Neo4j database." node = self.nodes_dict[node_label] diff --git a/servers/mcp-neo4j-data-modeling/tests/integration/conftest.py b/servers/mcp-neo4j-data-modeling/tests/integration/conftest.py deleted file mode 100644 index 4ebae19..0000000 --- a/servers/mcp-neo4j-data-modeling/tests/integration/conftest.py +++ /dev/null @@ -1,71 +0,0 @@ -import os -from typing import Any - -import pytest -import pytest_asyncio -from neo4j import AsyncGraphDatabase -from testcontainers.neo4j import Neo4jContainer - -from mcp_neo4j_data_modeling.server import create_mcp_server - -neo4j = ( - Neo4jContainer("neo4j:latest") - .with_env("NEO4J_apoc_export_file_enabled", "true") - .with_env("NEO4J_apoc_import_file_enabled", "true") - .with_env("NEO4J_apoc_import_file_use__neo4j__config", "true") - .with_env("NEO4J_PLUGINS", '["apoc"]') -) - - -@pytest.fixture(scope="module", autouse=True) -def setup(request): - neo4j.start() - - def remove_container(): - neo4j.get_driver().close() - neo4j.stop() - - request.addfinalizer(remove_container) - os.environ["NEO4J_URI"] = neo4j.get_connection_url() - os.environ["NEO4J_HOST"] = neo4j.get_container_host_ip() - os.environ["NEO4J_PORT"] = neo4j.get_exposed_port(7687) - - yield neo4j - - -@pytest_asyncio.fixture(scope="function") -async def async_neo4j_driver(setup: Neo4jContainer): - driver = AsyncGraphDatabase.driver( - setup.get_connection_url(), auth=(setup.username, setup.password) - ) - try: - yield driver - finally: - await driver.close() - - -@pytest_asyncio.fixture(scope="function") -async def mcp_server(async_neo4j_driver): - mcp = create_mcp_server(async_neo4j_driver, "neo4j") - - return mcp - - -@pytest.fixture(scope="function") -def init_data(setup: Neo4jContainer, clear_data: Any): - with setup.get_driver().session(database="neo4j") as session: - session.run("CREATE (a:Person {name: 'Alice', age: 30})") - session.run("CREATE (b:Person {name: 'Bob', age: 25})") - session.run("CREATE (c:Person {name: 'Charlie', age: 35})") - session.run( - "MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) CREATE (a)-[:FRIEND]->(b)" - ) - session.run( - "MATCH (b:Person {name: 'Bob'}), (c:Person {name: 'Charlie'}) CREATE (b)-[:FRIEND]->(c)" - ) - - -@pytest.fixture(scope="function") -def clear_data(setup: Neo4jContainer): - with setup.get_driver().session(database="neo4j") as session: - session.run("MATCH (n) DETACH DELETE n") diff --git a/servers/mcp-neo4j-data-modeling/tests/integration/test_aura_data_import_conversion_IT.py b/servers/mcp-neo4j-data-modeling/tests/integration/test_aura_data_import_conversion_IT.py new file mode 100644 index 0000000..2311513 --- /dev/null +++ b/servers/mcp-neo4j-data-modeling/tests/integration/test_aura_data_import_conversion_IT.py @@ -0,0 +1,46 @@ +import pytest +from mcp_neo4j_data_modeling.data_model import DataModel +import json +from typing import Any + +@pytest.fixture +def aura_data_import_model() -> dict[str, Any]: + with open("tests/resources/neo4j_importer_model_2025-06-30.json", "r") as f: + return json.load(f) + + +def test_aura_data_import_round_trip_data_integrity(aura_data_import_model: dict[str, Any]) -> None: + """Test that Aura Data Import model preserves essential data integrity through round-trip conversion.""" + # Load the model + data_model = DataModel.from_aura_data_import(aura_data_import_model) + + # Convert back to Aura Data Import format + converted_back = data_model.to_aura_data_import_dict() + + # Check top-level structure + assert converted_back["version"] == aura_data_import_model["version"] + assert converted_back["dataModel"]["version"] == aura_data_import_model["dataModel"]["version"] + + # Check that all nodes are preserved + original_node_labels = { + nl["token"] for nl in aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["nodeLabels"] + } + converted_node_labels = { + nl["token"] for nl in converted_back["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["nodeLabels"] + } + assert original_node_labels == converted_node_labels + + # Check that all relationships are preserved + original_rel_types = { + rt["token"] for rt in aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["relationshipTypes"] + } + converted_rel_types = { + rt["token"] for rt in converted_back["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["relationshipTypes"] + } + assert original_rel_types == converted_rel_types + + # Check that visualization nodes are preserved for all nodes + assert len(converted_back["visualisation"]["nodes"]) == len(aura_data_import_model["visualisation"]["nodes"]) + + # Check that metadata was preserved + assert converted_back["dataModel"]["configurations"] == aura_data_import_model["dataModel"]["configurations"] \ No newline at end of file diff --git a/servers/mcp-neo4j-data-modeling/tests/resources/neo4j_importer_model_2025-06-30.json b/servers/mcp-neo4j-data-modeling/tests/resources/neo4j_importer_model_2025-06-30.json new file mode 100644 index 0000000..bf390da --- /dev/null +++ b/servers/mcp-neo4j-data-modeling/tests/resources/neo4j_importer_model_2025-06-30.json @@ -0,0 +1,899 @@ +{ + "version": "2.3.1-beta.0", + "visualisation": { + "nodes": [ + { + "id": "n:0", + "position": { + "x": 0, + "y": 0 + } + }, + { + "id": "n:1", + "position": { + "x": 59.4453125, + "y": -93.5 + } + }, + { + "id": "n:2", + "position": { + "x": 68.11197916666667, + "y": 7.166666666666666 + } + }, + { + "id": "n:3", + "position": { + "x": -52.888020833333336, + "y": -94.83333333333333 + } + }, + { + "id": "n:4", + "position": { + "x": 42.778645833333336, + "y": 102.83333333333333 + } + } + ] + }, + "dataModel": { + "version": "2.3.1-beta.0", + "graphSchemaRepresentation": { + "version": "1.0.0", + "graphSchema": { + "nodeLabels": [ + { + "$id": "nl:0", + "token": "Country", + "properties": [ + { + "$id": "p:0_0", + "token": "countryId", + "type": { + "type": "integer" + }, + "nullable": false + }, + { + "$id": "p:0_1", + "token": "name", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_2", + "token": "iso3", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_3", + "token": "iso2", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_4", + "token": "numericCode", + "type": { + "type": "integer" + }, + "nullable": false + }, + { + "$id": "p:0_5", + "token": "phoneCode", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_6", + "token": "capital", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_10", + "token": "tld", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_11", + "token": "native", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_15", + "token": "latitude", + "type": { + "type": "float" + }, + "nullable": false + }, + { + "$id": "p:0_16", + "token": "longitude", + "type": { + "type": "float" + }, + "nullable": false + }, + { + "$id": "p:0_17", + "token": "emoji", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_18", + "token": "emojiU", + "type": { + "type": "string" + }, + "nullable": false + } + ] + }, + { + "$id": "nl:1", + "token": "SubRegion", + "properties": [ + { + "$id": "p:3", + "token": "subregion", + "type": { + "type": "string" + }, + "nullable": true + } + ] + }, + { + "$id": "nl:2", + "token": "Region", + "properties": [ + { + "$id": "p:1", + "token": "region", + "type": { + "type": "string" + }, + "nullable": true + } + ] + }, + { + "$id": "nl:3", + "token": "TimeZones", + "properties": [ + { + "$id": "p:2", + "token": "timezones", + "type": { + "type": "string" + }, + "nullable": true + } + ] + }, + { + "$id": "nl:4", + "token": "Currency", + "properties": [ + { + "$id": "p:4", + "token": "currency", + "type": { + "type": "string" + }, + "nullable": true + }, + { + "$id": "p:5", + "token": "currency_name", + "type": { + "type": "string" + }, + "nullable": true + }, + { + "$id": "p:6", + "token": "currency_symbol", + "type": { + "type": "string" + }, + "nullable": true + } + ] + } + ], + "relationshipTypes": [ + { + "$id": "rt:1", + "token": "IN_SUBREGION", + "properties": [] + }, + { + "$id": "rt:2", + "token": "IN_REGION", + "properties": [] + }, + { + "$id": "rt:3", + "token": "IN_TIMEZONE", + "properties": [] + }, + { + "$id": "rt:4", + "token": "USES_CURRENCY", + "properties": [] + } + ], + "nodeObjectTypes": [ + { + "$id": "n:0", + "labels": [ + { + "$ref": "#nl:0" + } + ] + }, + { + "$id": "n:1", + "labels": [ + { + "$ref": "#nl:1" + } + ] + }, + { + "$id": "n:2", + "labels": [ + { + "$ref": "#nl:2" + } + ] + }, + { + "$id": "n:3", + "labels": [ + { + "$ref": "#nl:3" + } + ] + }, + { + "$id": "n:4", + "labels": [ + { + "$ref": "#nl:4" + } + ] + } + ], + "relationshipObjectTypes": [ + { + "$id": "r:1", + "type": { + "$ref": "#rt:1" + }, + "from": { + "$ref": "#n:0" + }, + "to": { + "$ref": "#n:1" + } + }, + { + "$id": "r:2", + "type": { + "$ref": "#rt:2" + }, + "from": { + "$ref": "#n:1" + }, + "to": { + "$ref": "#n:2" + } + }, + { + "$id": "r:3", + "type": { + "$ref": "#rt:3" + }, + "from": { + "$ref": "#n:0" + }, + "to": { + "$ref": "#n:3" + } + }, + { + "$id": "r:4", + "type": { + "$ref": "#rt:4" + }, + "from": { + "$ref": "#n:0" + }, + "to": { + "$ref": "#n:4" + } + } + ], + "constraints": [ + { + "$id": "c:0", + "name": "countries.csv", + "constraintType": "uniqueness", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:0" + }, + "relationshipType": null, + "properties": [ + { + "$ref": "#p:0_0" + } + ] + }, + { + "$id": "c:1", + "name": "region_Region_uniq", + "constraintType": "uniqueness", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:2" + }, + "properties": [ + { + "$ref": "#p:1" + } + ] + }, + { + "$id": "c:2", + "name": "timezones_TimeZones_uniq", + "constraintType": "uniqueness", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:3" + }, + "properties": [ + { + "$ref": "#p:2" + } + ] + }, + { + "$id": "c:3", + "name": "subregion_SubRegion_uniq", + "constraintType": "uniqueness", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:1" + }, + "properties": [ + { + "$ref": "#p:3" + } + ] + }, + { + "$id": "c:4", + "name": "currency_Currency_uniq", + "constraintType": "uniqueness", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:4" + }, + "properties": [ + { + "$ref": "#p:4" + } + ] + } + ], + "indexes": [ + { + "$id": "i:0", + "name": "countries.csv", + "indexType": "default", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:0" + }, + "relationshipType": null, + "properties": [ + { + "$ref": "#p:0_0" + } + ] + }, + { + "$id": "i:1", + "name": "region_Region_uniq", + "indexType": "default", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:2" + }, + "properties": [ + { + "$ref": "#p:1" + } + ] + }, + { + "$id": "i:2", + "name": "timezones_TimeZones_uniq", + "indexType": "default", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:3" + }, + "properties": [ + { + "$ref": "#p:2" + } + ] + }, + { + "$id": "i:3", + "name": "subregion_SubRegion_uniq", + "indexType": "default", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:1" + }, + "properties": [ + { + "$ref": "#p:3" + } + ] + }, + { + "$id": "i:4", + "name": "currency_Currency_uniq", + "indexType": "default", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:4" + }, + "properties": [ + { + "$ref": "#p:4" + } + ] + } + ] + } + }, + "graphSchemaExtensionsRepresentation": { + "nodeKeyProperties": [ + { + "node": { + "$ref": "#n:0" + }, + "keyProperty": { + "$ref": "#p:0_0" + } + }, + { + "node": { + "$ref": "#n:2" + }, + "keyProperty": { + "$ref": "#p:1" + } + }, + { + "node": { + "$ref": "#n:3" + }, + "keyProperty": { + "$ref": "#p:2" + } + }, + { + "node": { + "$ref": "#n:1" + }, + "keyProperty": { + "$ref": "#p:3" + } + }, + { + "node": { + "$ref": "#n:4" + }, + "keyProperty": { + "$ref": "#p:4" + } + } + ] + }, + "graphMappingRepresentation": { + "dataSourceSchema": { + "type": "local", + "tableSchemas": [ + { + "name": "countries.csv", + "fields": [ + { + "name": "id", + "sample": "", + "recommendedType": { + "type": "integer" + } + }, + { + "name": "name", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "iso3", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "iso2", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "numeric_code", + "sample": "", + "recommendedType": { + "type": "integer" + } + }, + { + "name": "phone_code", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "capital", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "currency", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "currency_name", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "currency_symbol", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "tld", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "native", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "region", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "subregion", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "timezones", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "latitude", + "sample": "", + "recommendedType": { + "type": "float" + } + }, + { + "name": "longitude", + "sample": "", + "recommendedType": { + "type": "float" + } + }, + { + "name": "emoji", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "emojiU", + "sample": "", + "recommendedType": { + "type": "string" + } + } + ], + "primaryKeys": [], + "foreignKeys": [], + "expanded": true + } + ] + }, + "nodeMappings": [ + { + "node": { + "$ref": "#n:0" + }, + "tableName": "countries.csv", + "propertyMappings": [ + { + "property": { + "$ref": "#p:0_0" + }, + "fieldName": "id" + }, + { + "property": { + "$ref": "#p:0_1" + }, + "fieldName": "name" + }, + { + "property": { + "$ref": "#p:0_2" + }, + "fieldName": "iso3" + }, + { + "property": { + "$ref": "#p:0_3" + }, + "fieldName": "iso2" + }, + { + "property": { + "$ref": "#p:0_4" + }, + "fieldName": "numeric_code" + }, + { + "property": { + "$ref": "#p:0_5" + }, + "fieldName": "phone_code" + }, + { + "property": { + "$ref": "#p:0_6" + }, + "fieldName": "capital" + }, + { + "property": { + "$ref": "#p:0_10" + }, + "fieldName": "tld" + }, + { + "property": { + "$ref": "#p:0_11" + }, + "fieldName": "native" + }, + { + "property": { + "$ref": "#p:0_15" + }, + "fieldName": "latitude" + }, + { + "property": { + "$ref": "#p:0_16" + }, + "fieldName": "longitude" + }, + { + "property": { + "$ref": "#p:0_17" + }, + "fieldName": "emoji" + }, + { + "property": { + "$ref": "#p:0_18" + }, + "fieldName": "emojiU" + } + ] + }, + { + "node": { + "$ref": "#n:1" + }, + "tableName": "countries.csv", + "propertyMappings": [ + { + "fieldName": "subregion", + "property": { + "$ref": "#p:3" + } + } + ] + }, + { + "node": { + "$ref": "#n:2" + }, + "tableName": "countries.csv", + "propertyMappings": [ + { + "fieldName": "region", + "property": { + "$ref": "#p:1" + } + } + ] + }, + { + "node": { + "$ref": "#n:3" + }, + "tableName": "countries.csv", + "propertyMappings": [ + { + "fieldName": "timezones", + "property": { + "$ref": "#p:2" + } + } + ] + }, + { + "node": { + "$ref": "#n:4" + }, + "tableName": "countries.csv", + "propertyMappings": [ + { + "fieldName": "currency", + "property": { + "$ref": "#p:4" + } + }, + { + "fieldName": "currency_name", + "property": { + "$ref": "#p:5" + } + }, + { + "fieldName": "currency_symbol", + "property": { + "$ref": "#p:6" + } + } + ] + } + ], + "relationshipMappings": [ + { + "relationship": { + "$ref": "#r:3" + }, + "tableName": "countries.csv", + "propertyMappings": [], + "fromMapping": { + "fieldName": "id" + }, + "toMapping": { + "fieldName": "timezones" + } + }, + { + "relationship": { + "$ref": "#r:1" + }, + "tableName": "countries.csv", + "propertyMappings": [], + "fromMapping": { + "fieldName": "id" + }, + "toMapping": { + "fieldName": "subregion" + } + }, + { + "relationship": { + "$ref": "#r:2" + }, + "tableName": "countries.csv", + "propertyMappings": [], + "fromMapping": { + "fieldName": "subregion" + }, + "toMapping": { + "fieldName": "region" + } + }, + { + "relationship": { + "$ref": "#r:4" + }, + "tableName": "countries.csv", + "propertyMappings": [], + "fromMapping": { + "fieldName": "id" + }, + "toMapping": { + "fieldName": "currency" + } + } + ] + }, + "configurations": { + "idsToIgnore": [] + } + } +} \ No newline at end of file diff --git a/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py b/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py new file mode 100644 index 0000000..06f8130 --- /dev/null +++ b/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py @@ -0,0 +1,738 @@ +"""Unit tests for Aura Data Import conversion methods.""" + +import json +import pytest +from pathlib import Path + +from mcp_neo4j_data_modeling.data_model import DataModel, Node, Property, Relationship, PropertySource + + +@pytest.fixture +def sample_aura_data_import_model(): + """Load the sample Aura Data Import model from the JSON file.""" + json_file = Path(__file__).parent.parent / "resources" / "neo4j_importer_model_2025-06-30.json" + with open(json_file, 'r') as f: + return json.load(f) + + +@pytest.fixture +def sample_property_data(): + """Sample property data from the JSON file.""" + return { + "$id": "p:0_0", + "token": "countryId", + "type": {"type": "integer"}, + "nullable": False + } + + +@pytest.fixture +def sample_source_mapping(): + """Sample source mapping for a property.""" + return { + "tableName": "countries.csv", + "fieldName": "id", + "type": "local" + } + + +@pytest.fixture +def sample_node_label(): + """Sample node label from the JSON file.""" + return { + "$id": "nl:1", + "token": "SubRegion", + "properties": [ + { + "$id": "p:3", + "token": "subregion", + "type": {"type": "string"}, + "nullable": True + } + ] + } + + +@pytest.fixture +def sample_node_mapping(): + """Sample node mapping from the JSON file.""" + return { + "node": {"$ref": "#n:1"}, + "tableName": "countries.csv", + "propertyMappings": [ + { + "fieldName": "subregion", + "property": {"$ref": "#p:3"} + } + ] + } + + +class TestPropertyConversion: + """Test Property conversion methods.""" + + def test_from_aura_data_import_string_property(self, sample_source_mapping): + """Test converting a string property from Aura Data Import format.""" + aura_property = { + "$id": "p:1", + "token": "name", + "type": {"type": "string"}, + "nullable": False + } + + prop = Property.from_aura_data_import(aura_property, sample_source_mapping) + + assert prop.name == "name" + assert prop.type == "STRING" + assert prop.source.column_name == "id" + assert prop.source.table_name == "countries.csv" + assert prop.source.location == "local" + + def test_from_aura_data_import_integer_property(self, sample_source_mapping): + """Test converting an integer property from Aura Data Import format.""" + aura_property = { + "$id": "p:0_0", + "token": "countryId", + "type": {"type": "integer"}, + "nullable": False + } + + prop = Property.from_aura_data_import(aura_property, sample_source_mapping) + + assert prop.name == "countryId" + assert prop.type == "INTEGER" + + def test_from_aura_data_import_float_property(self, sample_source_mapping): + """Test converting a float property from Aura Data Import format.""" + aura_property = { + "$id": "p:0_15", + "token": "latitude", + "type": {"type": "float"}, + "nullable": False + } + + prop = Property.from_aura_data_import(aura_property, sample_source_mapping) + + assert prop.name == "latitude" + assert prop.type == "FLOAT" + + def test_from_aura_data_import_boolean_property(self, sample_source_mapping): + """Test converting a boolean property from Aura Data Import format.""" + aura_property = { + "$id": "p:7", + "token": "active", + "type": {"type": "boolean"}, + "nullable": True + } + + prop = Property.from_aura_data_import(aura_property, sample_source_mapping) + + assert prop.name == "active" + assert prop.type == "BOOLEAN" + + def test_to_aura_data_import_key_property(self): + """Test converting a key property to Aura Data Import format.""" + prop = Property(name="id", type="INTEGER") + + result = prop.to_aura_data_import("p:0_0", is_key=True) + + assert result["$id"] == "p:0_0" + assert result["token"] == "id" + assert result["type"]["type"] == "integer" + assert result["nullable"] == False # Key properties are not nullable + + def test_to_aura_data_import_non_key_property(self): + """Test converting a non-key property to Aura Data Import format.""" + prop = Property(name="name", type="STRING") + + result = prop.to_aura_data_import("p:0_1", is_key=False) + + assert result["$id"] == "p:0_1" + assert result["token"] == "name" + assert result["type"]["type"] == "string" + assert result["nullable"] == True # Non-key properties are nullable + + def test_to_aura_data_import_unknown_type_defaults_to_string(self): + """Test that unknown property types default to string.""" + prop = Property(name="custom", type="CUSTOM_TYPE") + + result = prop.to_aura_data_import("p:1", is_key=False) + + assert result["type"]["type"] == "string" + + +class TestNodeConversion: + """Test Node conversion methods.""" + + def test_from_aura_data_import_simple_node(self, sample_node_label, sample_node_mapping): + """Test converting a simple node from Aura Data Import format.""" + node = Node.from_aura_data_import( + sample_node_label, + "subregion", # key property token + sample_node_mapping, + "local" # source_type + ) + + assert node.label == "SubRegion" + assert node.key_property.name == "subregion" + assert node.key_property.type == "STRING" + assert len(node.properties) == 0 # Only one property, which is the key + + def test_from_aura_data_import_complex_node(self, sample_aura_data_import_model): + """Test converting a complex node with multiple properties.""" + # Get the Country node from the sample data + country_node_label = sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["nodeLabels"][0] + country_node_mapping = sample_aura_data_import_model["dataModel"]["graphMappingRepresentation"]["nodeMappings"][0] + + node = Node.from_aura_data_import( + country_node_label, + "countryId", # key property token + country_node_mapping, + "local" # source_type + ) + + assert node.label == "Country" + assert node.key_property.name == "countryId" + assert node.key_property.type == "INTEGER" + assert len(node.properties) == 12 # 13 total properties - 1 key = 12 non-key properties + + # Check some specific properties + property_names = [p.name for p in node.properties] + assert "name" in property_names + assert "iso3" in property_names + assert "latitude" in property_names + + def test_from_aura_data_import_missing_key_property_uses_first(self, sample_node_label, sample_node_mapping): + """Test that when key property is not found, first property is used as key.""" + node = Node.from_aura_data_import( + sample_node_label, + "nonexistent_key", # This key doesn't exist + sample_node_mapping, + "local" # source_type + ) + + assert node.label == "SubRegion" + assert node.key_property.name == "subregion" # First property becomes key + assert len(node.properties) == 0 + + def test_to_aura_data_import_simple_node(self): + """Test converting a simple node to Aura Data Import format.""" + key_prop = Property(name="id", type="INTEGER") + other_prop = Property(name="name", type="STRING") + node = Node(label="TestNode", key_property=key_prop, properties=[other_prop]) + + node_label, key_property = node.to_aura_data_import("nl:0") + + # Check node label + assert node_label["$id"] == "nl:0" + assert node_label["token"] == "TestNode" + assert len(node_label["properties"]) == 2 + + # Check key property is first and not nullable + assert node_label["properties"][0]["token"] == "id" + assert node_label["properties"][0]["nullable"] == False + assert node_label["properties"][1]["token"] == "name" + assert node_label["properties"][1]["nullable"] == True + + # Check key property mapping + assert key_property["node"]["$ref"] == "#nl:0" + assert key_property["keyProperty"]["$ref"] == "#p:0_0" + + def test_node_mapping_property_not_found_raises_error(self, sample_node_label): + """Test that missing property in node mapping raises an error.""" + invalid_mapping = { + "node": {"$ref": "#n:1"}, + "tableName": "countries.csv", + "propertyMappings": [] # Empty mappings + } + + with pytest.raises(ValueError, match="Property p:3 not found in node mapping"): + Node.from_aura_data_import(sample_node_label, "subregion", invalid_mapping, "local") + + +class TestRelationshipConversion: + """Test Relationship conversion methods.""" + + def test_from_aura_data_import_simple_relationship(self): + """Test converting a simple relationship from Aura Data Import format.""" + rel_type = { + "$id": "rt:1", + "token": "IN_SUBREGION", + "properties": [] + } + + rel_obj = { + "$id": "r:1", + "type": {"$ref": "#rt:1"}, + "from": {"$ref": "#n:0"}, + "to": {"$ref": "#n:1"} + } + + node_id_to_label_map = { + "#n:0": "Country", + "#n:1": "SubRegion" + } + + # Empty relationship mapping since there are no properties + rel_mapping = { + "relationship": {"$ref": "#r:1"}, + "tableName": "relationships.csv", + "propertyMappings": [] + } + + relationship = Relationship.from_aura_data_import( + rel_type, rel_obj, node_id_to_label_map, rel_mapping, "local" + ) + + assert relationship.type == "IN_SUBREGION" + assert relationship.start_node_label == "Country" + assert relationship.end_node_label == "SubRegion" + assert relationship.key_property is None + assert len(relationship.properties) == 0 + + def test_from_aura_data_import_relationship_with_properties(self): + """Test converting a relationship with properties.""" + rel_type = { + "$id": "rt:2", + "token": "CONNECTED_TO", + "properties": [ + { + "$id": "p:rel_1", + "token": "weight", + "type": {"type": "float"}, + "nullable": False + }, + { + "$id": "p:rel_2", + "token": "since", + "type": {"type": "string"}, + "nullable": True + } + ] + } + + rel_obj = { + "$id": "r:2", + "type": {"$ref": "#rt:2"}, + "from": {"$ref": "#n:0"}, + "to": {"$ref": "#n:1"} + } + + node_id_to_label_map = { + "#n:0": "NodeA", + "#n:1": "NodeB" + } + + # Relationship mapping with properties + rel_mapping = { + "relationship": {"$ref": "#r:2"}, + "tableName": "relationships.csv", + "propertyMappings": [ + { + "property": {"$ref": "#p:rel_1"}, + "fieldName": "weight" + }, + { + "property": {"$ref": "#p:rel_2"}, + "fieldName": "since" + } + ] + } + + relationship = Relationship.from_aura_data_import( + rel_type, rel_obj, node_id_to_label_map, rel_mapping, "local" + ) + + assert relationship.type == "CONNECTED_TO" + assert relationship.key_property.name == "weight" # First property becomes key + assert len(relationship.properties) == 1 # Second property + assert relationship.properties[0].name == "since" + + def test_to_aura_data_import_simple_relationship(self): + """Test converting a simple relationship to Aura Data Import format.""" + relationship = Relationship( + type="KNOWS", + start_node_label="Person", + end_node_label="Person" + ) + + rel_type, rel_obj = relationship.to_aura_data_import("rt:1", "r:1", "n:0", "n:1") + + # Check relationship type + assert rel_type["$id"] == "rt:1" + assert rel_type["token"] == "KNOWS" + assert len(rel_type["properties"]) == 0 + + # Check relationship object + assert rel_obj["$id"] == "r:1" + assert rel_obj["type"]["$ref"] == "#rt:1" + assert rel_obj["from"]["$ref"] == "#n:0" + assert rel_obj["to"]["$ref"] == "#n:1" + + def test_to_aura_data_import_relationship_with_properties(self): + """Test converting a relationship with properties to Aura Data Import format.""" + key_prop = Property(name="relationshipId", type="INTEGER") + other_prop = Property(name="strength", type="FLOAT") + + relationship = Relationship( + type="CONNECTED", + start_node_label="NodeA", + end_node_label="NodeB", + key_property=key_prop, + properties=[other_prop] + ) + + rel_type, rel_obj = relationship.to_aura_data_import("rt:2", "r:2", "n:0", "n:1") + + # Check relationship type has properties + assert len(rel_type["properties"]) == 2 + assert rel_type["properties"][0]["token"] == "relationshipId" + assert rel_type["properties"][0]["nullable"] == False # Key property + assert rel_type["properties"][1]["token"] == "strength" + assert rel_type["properties"][1]["nullable"] == True # Non-key property + + +class TestDataModelConversion: + """Test DataModel conversion methods.""" + + def test_from_aura_data_import_full_model(self, sample_aura_data_import_model): + """Test converting the full sample Aura Data Import model.""" + data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) + + # Check nodes + assert len(data_model.nodes) == 5 + node_labels = [n.label for n in data_model.nodes] + assert "Country" in node_labels + assert "SubRegion" in node_labels + assert "Region" in node_labels + assert "TimeZones" in node_labels + assert "Currency" in node_labels + + # Check relationships + assert len(data_model.relationships) == 4 + rel_types = [r.type for r in data_model.relationships] + assert "IN_SUBREGION" in rel_types + assert "IN_REGION" in rel_types + assert "IN_TIMEZONE" in rel_types + assert "USES_CURRENCY" in rel_types + + def test_from_aura_data_import_node_key_properties(self, sample_aura_data_import_model): + """Test that node key properties are correctly identified.""" + data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) + + # Find specific nodes and check their key properties + country_node = next(n for n in data_model.nodes if n.label == "Country") + assert country_node.key_property.name == "countryId" + assert country_node.key_property.type == "INTEGER" + + region_node = next(n for n in data_model.nodes if n.label == "Region") + assert region_node.key_property.name == "region" + assert region_node.key_property.type == "STRING" + + def test_to_aura_data_import_dict_structure(self): + """Test the structure of the exported Aura Data Import dictionary.""" + # Create a simple data model + key_prop = Property(name="id", type="INTEGER") + node1 = Node(label="TestNode", key_property=key_prop) + + rel = Relationship( + type="TEST_REL", + start_node_label="TestNode", + end_node_label="TestNode" + ) + + data_model = DataModel(nodes=[node1], relationships=[rel]) + + result = data_model.to_aura_data_import_dict() + + # Check top-level structure + assert "version" in result + assert "visualisation" in result + assert "dataModel" in result + + # Check visualization structure + assert "nodes" in result["visualisation"] + assert len(result["visualisation"]["nodes"]) == 1 + + # Check data model structure + data_model_content = result["dataModel"] + assert "graphSchemaRepresentation" in data_model_content + assert "graphSchemaExtensionsRepresentation" in data_model_content + assert "graphMappingRepresentation" in data_model_content + assert "configurations" in data_model_content + + # Check graph schema + graph_schema = data_model_content["graphSchemaRepresentation"]["graphSchema"] + assert "nodeLabels" in graph_schema + assert "relationshipTypes" in graph_schema + assert "nodeObjectTypes" in graph_schema + assert "relationshipObjectTypes" in graph_schema + assert "constraints" in graph_schema + assert "indexes" in graph_schema + + def test_to_aura_data_import_dict_node_constraints_and_indexes(self): + """Test that constraints and indexes are properly generated.""" + key_prop = Property(name="userId", type="INTEGER") + node = Node(label="User", key_property=key_prop) + data_model = DataModel(nodes=[node]) + + result = data_model.to_aura_data_import_dict() + + graph_schema = result["dataModel"]["graphSchemaRepresentation"]["graphSchema"] + + # Check constraints + assert len(graph_schema["constraints"]) == 1 + constraint = graph_schema["constraints"][0] + assert constraint["name"] == "User_constraint" + assert constraint["constraintType"] == "uniqueness" + assert constraint["entityType"] == "node" + + # Check indexes + assert len(graph_schema["indexes"]) == 1 + index = graph_schema["indexes"][0] + assert index["name"] == "User_index" + assert index["indexType"] == "default" + assert index["entityType"] == "node" + + def test_round_trip_conversion_simple(self): + """Test that a simple model can be converted to Aura format and back.""" + # Create original model + key_prop = Property(name="id", type="STRING") + node = Node(label="TestNode", key_property=key_prop) + original_model = DataModel(nodes=[node]) + + # Convert to Aura format + aura_dict = original_model.to_aura_data_import_dict() + + # Convert back + converted_model = DataModel.from_aura_data_import(aura_dict) + + # Check that essential structure is preserved + assert len(converted_model.nodes) == 1 + assert converted_model.nodes[0].label == "TestNode" + assert converted_model.nodes[0].key_property.name == "id" + assert converted_model.nodes[0].key_property.type == "STRING" + + def test_round_trip_conversion_with_relationships(self): + """Test round-trip conversion with relationships.""" + # Create original model + key_prop1 = Property(name="id1", type="INTEGER") + key_prop2 = Property(name="id2", type="STRING") + node1 = Node(label="Node1", key_property=key_prop1) + node2 = Node(label="Node2", key_property=key_prop2) + + rel = Relationship( + type="CONNECTS", + start_node_label="Node1", + end_node_label="Node2" + ) + + original_model = DataModel(nodes=[node1, node2], relationships=[rel]) + + # Convert to Aura format and back + aura_dict = original_model.to_aura_data_import_dict() + converted_model = DataModel.from_aura_data_import(aura_dict) + + # Check nodes + assert len(converted_model.nodes) == 2 + node_labels = [n.label for n in converted_model.nodes] + assert "Node1" in node_labels + assert "Node2" in node_labels + + # Check relationships + assert len(converted_model.relationships) == 1 + assert converted_model.relationships[0].type == "CONNECTS" + assert converted_model.relationships[0].start_node_label == "Node1" + assert converted_model.relationships[0].end_node_label == "Node2" + + def test_json_serialization(self, sample_aura_data_import_model): + """Test that the converted model can be serialized to JSON.""" + data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) + json_str = data_model.to_aura_data_import_json_str() + + # Should be valid JSON + parsed = json.loads(json_str) + assert isinstance(parsed, dict) + assert "dataModel" in parsed + + def test_metadata_preservation_round_trip(self, sample_aura_data_import_model): + """Test that metadata (constraints, indexes, version, configurations) is preserved during round-trip conversion.""" + # Convert from Aura Data Import to our model + data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) + + # Verify metadata was stored + assert "aura_data_import" in data_model.metadata + aura_metadata = data_model.metadata["aura_data_import"] + + # Check that all expected metadata fields are present + assert "version" in aura_metadata + assert "dataModel_version" in aura_metadata + assert "constraints" in aura_metadata + assert "indexes" in aura_metadata + assert "configurations" in aura_metadata + assert "dataSourceSchema" in aura_metadata + + # Verify the stored values match the original + assert aura_metadata["version"] == sample_aura_data_import_model["version"] + assert aura_metadata["dataModel_version"] == sample_aura_data_import_model["dataModel"]["version"] + assert aura_metadata["constraints"] == sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["constraints"] + assert aura_metadata["indexes"] == sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["indexes"] + assert aura_metadata["configurations"] == sample_aura_data_import_model["dataModel"]["configurations"] + + # Check that visualization data was stored in node metadata + original_vis_nodes = sample_aura_data_import_model["visualisation"]["nodes"] + for i, node in enumerate(data_model.nodes): + node_id = f"n:{i}" + original_vis_node = next((v for v in original_vis_nodes if v["id"] == node_id), None) + if original_vis_node: + assert "visualization" in node.metadata + assert node.metadata["visualization"]["position"] == original_vis_node["position"] + + # Convert back to Aura Data Import + converted_back = data_model.to_aura_data_import_dict() + + # Verify that the metadata was restored + assert converted_back["version"] == sample_aura_data_import_model["version"] + assert converted_back["dataModel"]["version"] == sample_aura_data_import_model["dataModel"]["version"] + assert converted_back["dataModel"]["configurations"] == sample_aura_data_import_model["dataModel"]["configurations"] + + # Verify that visualization was reconstructed correctly + assert "visualisation" in converted_back + assert "nodes" in converted_back["visualisation"] + assert len(converted_back["visualisation"]["nodes"]) == len(data_model.nodes) + + # Check that positions were preserved for existing nodes + original_vis_nodes = sample_aura_data_import_model["visualisation"]["nodes"] + converted_vis_nodes = converted_back["visualisation"]["nodes"] + for original_vis_node in original_vis_nodes: + converted_vis_node = next((v for v in converted_vis_nodes if v["id"] == original_vis_node["id"]), None) + if converted_vis_node: + assert converted_vis_node["position"] == original_vis_node["position"] + + # Check that constraints and indexes were preserved + original_constraints = sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["constraints"] + converted_constraints = converted_back["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["constraints"] + assert converted_constraints == original_constraints + + original_indexes = sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["indexes"] + converted_indexes = converted_back["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["indexes"] + assert converted_indexes == original_indexes + + def test_export_without_metadata_uses_defaults(self): + """Test that exporting a DataModel without Aura metadata uses appropriate defaults.""" + # Create a simple DataModel from scratch (no metadata) + key_prop = Property(name="id", type="INTEGER") + node = Node(label="TestNode", key_property=key_prop) + data_model = DataModel(nodes=[node]) + + # Export to Aura Data Import format + aura_dict = data_model.to_aura_data_import_dict() + + # Verify default values are used + assert aura_dict["version"] == "2.3.1-beta.0" + assert aura_dict["dataModel"]["version"] == "2.3.1-beta.0" + assert aura_dict["dataModel"]["configurations"] == {"idsToIgnore": []} + assert aura_dict["dataModel"]["graphMappingRepresentation"]["dataSourceSchema"] == {"type": "local", "tableSchemas": []} + + # Verify visualization nodes are generated + assert "visualisation" in aura_dict + assert "nodes" in aura_dict["visualisation"] + assert len(aura_dict["visualisation"]["nodes"]) == 1 + assert aura_dict["visualisation"]["nodes"][0]["id"] == "n:0" + + # Verify constraints and indexes are generated for the node + graph_schema = aura_dict["dataModel"]["graphSchemaRepresentation"]["graphSchema"] + assert len(graph_schema["constraints"]) == 1 + assert len(graph_schema["indexes"]) == 1 + assert graph_schema["constraints"][0]["name"] == "TestNode_constraint" + assert graph_schema["indexes"][0]["name"] == "TestNode_index" + + def test_visualization_reconstruction_with_new_nodes(self, sample_aura_data_import_model): + """Test that visualization is properly reconstructed when new nodes are added.""" + # Convert from Aura Data Import to our model + data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) + + # Add a new node that wasn't in the original data + new_key_prop = Property(name="newId", type="STRING") + new_node = Node(label="NewNode", key_property=new_key_prop) + data_model.add_node(new_node) + + # Convert back to Aura Data Import + converted_back = data_model.to_aura_data_import_dict() + + # Verify visualization includes all nodes (original + new) + vis_nodes = converted_back["visualisation"]["nodes"] + assert len(vis_nodes) == len(data_model.nodes) + + # Check that original nodes kept their positions + original_vis_nodes = sample_aura_data_import_model["visualisation"]["nodes"] + for original_vis_node in original_vis_nodes: + converted_vis_node = next((v for v in vis_nodes if v["id"] == original_vis_node["id"]), None) + if converted_vis_node: + assert converted_vis_node["position"] == original_vis_node["position"] + + # Check that new node got a default position + new_node_id = f"n:{len(data_model.nodes) - 1}" # Last node should be the new one + new_vis_node = next((v for v in vis_nodes if v["id"] == new_node_id), None) + assert new_vis_node is not None + assert "position" in new_vis_node + assert isinstance(new_vis_node["position"]["x"], float) + assert isinstance(new_vis_node["position"]["y"], float) + + +class TestEdgeCases: + """Test edge cases and error conditions.""" + + def test_empty_data_model_conversion(self): + """Test converting an empty data model.""" + empty_model = DataModel() + + aura_dict = empty_model.to_aura_data_import_dict() + + # Should have basic structure even when empty + assert "version" in aura_dict + assert "visualisation" in aura_dict + assert len(aura_dict["visualisation"]["nodes"]) == 0 + + graph_schema = aura_dict["dataModel"]["graphSchemaRepresentation"]["graphSchema"] + assert len(graph_schema["nodeLabels"]) == 0 + assert len(graph_schema["relationshipTypes"]) == 0 + + def test_node_with_no_properties_mapping(self, sample_node_label): + """Test handling of node with missing property mappings.""" + empty_mapping = { + "node": {"$ref": "#n:1"}, + "tableName": "unknown", + "propertyMappings": [] + } + + # Should raise error when property is not found in mapping + with pytest.raises(ValueError): + Node.from_aura_data_import(sample_node_label, "subregion", empty_mapping, "local") + + def test_malformed_aura_data_missing_required_fields(self): + """Test handling of malformed Aura Data Import data.""" + malformed_data = { + "version": "2.3.1-beta.0", + # Missing visualisation and dataModel + } + + with pytest.raises(KeyError): + DataModel.from_aura_data_import(malformed_data) + + def test_property_type_edge_cases(self, sample_source_mapping): + """Test property type conversion edge cases.""" + # Test with unknown type + unknown_type_prop = { + "$id": "p:unknown", + "token": "unknown", + "type": {"type": "unknown_type"}, + "nullable": False + } + + prop = Property.from_aura_data_import(unknown_type_prop, sample_source_mapping) + assert prop.type == "UNKNOWN_TYPE" # Should uppercase unknown types + + # Test conversion back + result = prop.to_aura_data_import("p:test", is_key=False) + assert result["type"]["type"] == "string" # Should default to string \ No newline at end of file From cd998c5c6334ec38a736ecfe2643c9722ff20597 Mon Sep 17 00:00:00 2001 From: alex Date: Tue, 1 Jul 2025 08:35:27 -0500 Subject: [PATCH 2/6] include local or remote source, update changelog --- servers/mcp-neo4j-data-modeling/CHANGELOG.md | 1 + .../src/mcp_neo4j_data_modeling/data_model.py | 594 +++++++++++++----- .../test_aura_data_import_conversion_IT.py | 46 -- .../unit/test_aura_data_import_conversion.py | 542 ++++++++++------ 4 files changed, 773 insertions(+), 410 deletions(-) delete mode 100644 servers/mcp-neo4j-data-modeling/tests/integration/test_aura_data_import_conversion_IT.py diff --git a/servers/mcp-neo4j-data-modeling/CHANGELOG.md b/servers/mcp-neo4j-data-modeling/CHANGELOG.md index b677d5d..a18af2e 100644 --- a/servers/mcp-neo4j-data-modeling/CHANGELOG.md +++ b/servers/mcp-neo4j-data-modeling/CHANGELOG.md @@ -5,6 +5,7 @@ ### Changed ### Added +* Add import and export from Aura Data Import tool format ## v0.1.1 diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py index e6427cf..a86a87e 100644 --- a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py @@ -45,6 +45,10 @@ class PropertySource(BaseModel): default=None, description="The location of the property, if known. May be a file path, URL, etc.", ) + source_type: str | None = Field( + default=None, + description="The type of the data source: 'local' or 'remote'.", + ) class Property(BaseModel): @@ -61,6 +65,10 @@ class Property(BaseModel): description: str | None = Field( default=None, description="The description of the property" ) + metadata: dict[str, Any] = Field( + default_factory=dict, + description="The metadata of the property. This should only be used when converting data models.", + ) @field_validator("type") def validate_type(cls, v: str) -> str: @@ -139,12 +147,21 @@ def from_aura_data_import( column_name=source_mapping.get("fieldName", None), table_name=source_mapping.get("tableName", None), location=source_mapping.get("type", None), + source_type=source_mapping.get("source_type", "local"), ) + + # Create property with nullable and original ID stored in metadata return cls( name=aura_data_import_property["token"], type=mapped_type, description=None, # Aura Data Import doesn't have descriptions source=source, + metadata={ + "aura_data_import": { + "nullable": aura_data_import_property.get("nullable", False), + "original_id": aura_data_import_property.get("$id"), + } + }, ) def to_aura_data_import( @@ -165,11 +182,14 @@ def to_aura_data_import( self.type, "string" ) # Default to string if type is not found + # Use stored nullable value from metadata, or default based on key property + nullable = self.metadata.get("aura_data_import", {}).get("nullable", not is_key) + return { "$id": property_id, "token": self.name, "type": {"type": mapped_type}, - "nullable": not is_key, + "nullable": nullable, } @@ -280,7 +300,7 @@ def from_aura_data_import( aura_data_import_node_label: dict[str, Any], key_property_token: str, node_mapping: dict[str, Any], - source_type: str + source_type: str, ) -> "Node": """ Convert an Aura Data Import NodeLabel to a Node. @@ -331,18 +351,21 @@ def _prepare_source_mapping( return { "tableName": node_mapping["tableName"], "fieldName": field_name[0], - "type": source_type, # Default to local type + "type": "local", # This was the original location field + "source_type": source_type, # The actual data source type } for prop in aura_data_import_node_label["properties"]: if prop["token"] == key_property_token: key_prop = Property.from_aura_data_import( - prop, _prepare_source_mapping(node_mapping, prop["$id"], source_type) + prop, + _prepare_source_mapping(node_mapping, prop["$id"], source_type), ) else: other_props.append( Property.from_aura_data_import( - prop, _prepare_source_mapping(node_mapping, prop["$id"], source_type) + prop, + _prepare_source_mapping(node_mapping, prop["$id"], source_type), ) ) @@ -370,29 +393,71 @@ def _prepare_source_mapping( ) def to_aura_data_import( - self, node_id: str - ) -> tuple[dict[str, Any], dict[str, Any]]: + self, + node_label_id: str, + node_obj_id: str, + key_prop_id: str, + constraint_id: str, + index_id: str, + property_id_mapping: dict[str, str] = None, + ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any], dict[str, Any]]: """ Convert a Node to Aura Data Import NodeLabel format. - Returns tuple of (NodeLabel, KeyProperty) + Returns tuple of (NodeLabel, KeyProperty, Constraint, Index) """ # Create property list with key property first all_props = [self.key_property] + self.properties aura_props = [] + # For the first property (key property), use the provided key_prop_id + # For additional properties, use the property_id_mapping if provided for i, prop in enumerate(all_props): - prop_id = f"p:{node_id.split(':')[1]}_{i}" + if i == 0: + prop_id = key_prop_id + else: + # Use property mapping if available, otherwise generate based on node pattern + if property_id_mapping and prop.name in property_id_mapping: + prop_id = property_id_mapping[prop.name] + else: + prop_id = f"p:{node_label_id.split(':')[1]}_{i}" + is_key = i == 0 # First property is the key property aura_props.append(prop.to_aura_data_import(prop_id, is_key=is_key)) - node = {"$id": node_id, "token": self.label, "properties": aura_props} + node_label = { + "$id": node_label_id, + "token": self.label, + "properties": aura_props, + } key_property = { - "node": {"$ref": f"#{node_id}"}, - "keyProperty": {"$ref": f"#{aura_props[0]['$id']}"}, + "node": {"$ref": f"#{node_obj_id}"}, + "keyProperty": {"$ref": f"#{key_prop_id}"}, } - return (node, key_property) + # Create uniqueness constraint on key property + constraint = { + "$id": constraint_id, + "name": f"{self.label}_constraint", + "constraintType": "uniqueness", + "entityType": "node", + "nodeLabel": {"$ref": f"#{node_label_id}"}, + "relationshipType": None, + "properties": [{"$ref": f"#{key_prop_id}"}], + } + + # Create default index on key property + index = { + "$id": index_id, + "name": f"{self.label}_index", + "indexType": "default", + "entityType": "node", + "nodeLabel": {"$ref": f"#{node_label_id}"}, + "relationshipType": None, + "properties": [{"$ref": f"#{key_prop_id}"}], + } + + return (node_label, key_property, constraint, index) def get_cypher_ingest_query_for_many_records(self) -> str: """ @@ -544,7 +609,7 @@ def from_aura_data_import( aura_data_import_relationship_object: dict[str, Any], node_id_to_label_map: dict[str, str], relationship_mapping: dict[str, Any], - source_type: str + source_type: str, ) -> "Relationship": """Convert Aura Data Import RelationshipType and RelationshipObjectType to a Relationship.""" # Convert properties @@ -552,11 +617,10 @@ def from_aura_data_import( other_props = [] def _prepare_source_mapping( - relationship_mapping: dict[str, Any], property_id: str, - source_type: str + relationship_mapping: dict[str, Any], property_id: str, source_type: str ) -> dict[str, Any]: """ - Prepare the source mapping for the node mapping. + Prepare the source mapping for the relationship mapping. """ field_name = [ x["fieldName"] @@ -564,17 +628,21 @@ def _prepare_source_mapping( if x["property"]["$ref"] == "#" + property_id ] if not field_name: - raise ValueError(f"Property {property_id} not found in node mapping") + raise ValueError(f"Property {property_id} not found in relationship mapping") return { "tableName": relationship_mapping["tableName"], "fieldName": field_name[0], - "type": source_type, + "type": "local", # This was the original location field + "source_type": source_type, # The actual data source type } - + for prop in aura_data_import_relationship_type["properties"]: # Create a default source mapping for relationship properties - - converted_prop = Property.from_aura_data_import(prop, _prepare_source_mapping(relationship_mapping, prop["$id"], source_type)) + + converted_prop = Property.from_aura_data_import( + prop, + _prepare_source_mapping(relationship_mapping, prop["$id"], source_type), + ) # For simplicity, treat first property as key if any exist if not key_prop and aura_data_import_relationship_type["properties"]: key_prop = converted_prop @@ -594,11 +662,20 @@ def _prepare_source_mapping( ) def to_aura_data_import( - self, rel_type_id: str, rel_obj_id: str, start_node_id: str, end_node_id: str - ) -> tuple[dict[str, Any], dict[str, Any]]: + self, + rel_type_id: str, + rel_obj_id: str, + start_node_id: str, + end_node_id: str, + constraint_id: str = None, + index_id: str = None, + ) -> tuple[ + dict[str, Any], dict[str, Any], dict[str, Any] | None, dict[str, Any] | None + ]: """Convert a Relationship to Aura Data Import format. - Returns tuple of (RelationshipType, RelationshipObjectType) + Returns tuple of (RelationshipType, RelationshipObjectType, Constraint, Index) + Constraint and Index are None if the relationship has no key property. """ # Create relationship type all_props = [] @@ -609,7 +686,9 @@ def to_aura_data_import( aura_props = [] for i, prop in enumerate(all_props): prop_id = f"p:{rel_type_id.split(':')[1]}_{i}" - is_key = i == 0 # First property is the key property + is_key = ( + i == 0 and self.key_property is not None + ) # First property is the key property if it exists aura_props.append(prop.to_aura_data_import(prop_id, is_key=is_key)) relationship_type = { @@ -626,7 +705,33 @@ def to_aura_data_import( "to": {"$ref": f"#{end_node_id}"}, } - return relationship_type, relationship_object + # Create constraint and index if relationship has key property + constraint = None + index = None + if self.key_property and constraint_id and index_id: + key_prop_id = aura_props[0]["$id"] # First property is the key property + + constraint = { + "$id": constraint_id, + "name": f"{self.type}_constraint", + "constraintType": "uniqueness", + "entityType": "relationship", + "nodeLabel": None, + "relationshipType": {"$ref": f"#{rel_type_id}"}, + "properties": [{"$ref": f"#{key_prop_id}"}], + } + + index = { + "$id": index_id, + "name": f"{self.type}_index", + "indexType": "default", + "entityType": "relationship", + "nodeLabel": None, + "relationshipType": {"$ref": f"#{rel_type_id}"}, + "properties": [{"$ref": f"#{key_prop_id}"}], + } + + return relationship_type, relationship_object, constraint, index def get_cypher_ingest_query_for_many_records( self, start_node_key_property_name: str, end_node_key_property_name: str @@ -854,6 +959,12 @@ def from_aura_data_import( node_mappings = aura_data_import_data_model["dataModel"][ "graphMappingRepresentation" ]["nodeMappings"] + + # Get the data source schema to determine source type + data_source_schema = aura_data_import_data_model["dataModel"][ + "graphMappingRepresentation" + ]["dataSourceSchema"] + source_type = data_source_schema.get("type", "local") # Create mapping from node object ID to key property token node_key_map = {} @@ -883,7 +994,7 @@ def from_aura_data_import( relationship_mappings = aura_data_import_data_model["dataModel"][ "graphMappingRepresentation" ]["relationshipMappings"] - + # Create mapping from relationship object ID to relationship mapping rel_obj_to_mapping = {} for rel_mapping in relationship_mappings: @@ -925,7 +1036,7 @@ def from_aura_data_import( ) node = Node.from_aura_data_import( - node_label, key_property_token, node_mapping, "local" + node_label, key_property_token, node_mapping, source_type ) nodes.append(node) @@ -947,21 +1058,23 @@ def from_aura_data_import( rel_obj_id, { "relationship": {"$ref": rel_obj_id}, - "tableName": "relationships.csv", + "tableName": "unknown", "propertyMappings": [], }, ) - + relationship = Relationship.from_aura_data_import( - rel_type, rel_obj, node_id_to_label_map, rel_mapping, "local" + rel_type, rel_obj, node_id_to_label_map, rel_mapping, source_type ) relationships.append(relationship) # Store visualization coordinates in node metadata visualization_data = aura_data_import_data_model.get("visualisation", {}) vis_nodes = visualization_data.get("nodes", []) - vis_node_positions = {vis_node["id"]: vis_node["position"] for vis_node in vis_nodes} - + vis_node_positions = { + vis_node["id"]: vis_node["position"] for vis_node in vis_nodes + } + # Update node metadata with visualization coordinates for i, node in enumerate(nodes): node_id = f"n:{i}" @@ -974,75 +1087,81 @@ def from_aura_data_import( metadata = { "aura_data_import": { "version": aura_data_import_data_model.get("version"), - "dataModel_version": aura_data_import_data_model["dataModel"].get("version"), + "dataModel_version": aura_data_import_data_model["dataModel"].get( + "version" + ), "constraints": graph_schema.get("constraints", []), "indexes": graph_schema.get("indexes", []), - "configurations": aura_data_import_data_model["dataModel"].get("configurations", {}), - "dataSourceSchema": aura_data_import_data_model["dataModel"]["graphMappingRepresentation"].get("dataSourceSchema", {}), + "configurations": aura_data_import_data_model["dataModel"].get( + "configurations", {} + ), + "dataSourceSchema": aura_data_import_data_model["dataModel"][ + "graphMappingRepresentation" + ].get("dataSourceSchema", {}), } } return cls(nodes=nodes, relationships=relationships, metadata=metadata) - - def to_aura_data_import_dict(self) -> dict[str, Any]: + + def to_aura_data_import_dict(self) -> dict[str, Any]: """Convert the data model to an Aura Data Import dictionary.""" # Check if we have stored Aura Data Import metadata aura_metadata = self.metadata.get("aura_data_import", {}) - + # Generate IDs following the original schema patterns node_labels = [] node_object_types = [] node_key_properties = [] constraints = [] indexes = [] - + # Track property IDs to match original schema - property_counter = 0 node_to_key_prop_id = {} + # Generate property IDs dynamically + global_property_counter = 0 + for i, node in enumerate(self.nodes): node_label_id = f"nl:{i}" node_obj_id = f"n:{i}" + constraint_id = f"c:{i}" + index_id = f"i:{i}" - # Create node label with original ID schema - all_props = [node.key_property] + node.properties - aura_props = [] - - # For Country node (first node), use p:0_0, p:0_1, etc. pattern - # For other nodes, use simple p:1, p:2, etc. pattern - if i == 0: # Country node - for j, prop in enumerate(all_props): - prop_id = f"p:{i}_{j}" - is_key = j == 0 - aura_props.append(prop.to_aura_data_import(prop_id, is_key=is_key)) - if is_key: - node_to_key_prop_id[node_obj_id] = prop_id - else: # Other nodes - # Use simple property IDs starting from where Country left off - if i == 1: # SubRegion - prop_id = "p:3" - elif i == 2: # Region - prop_id = "p:1" - elif i == 3: # TimeZones - prop_id = "p:2" - elif i == 4: # Currency - prop_id = "p:4" + # Use stored original property ID if available, otherwise generate new one + key_prop_id = node.key_property.metadata.get("aura_data_import", {}).get( + "original_id", f"p:{global_property_counter}" + ) + if not node.key_property.metadata.get("aura_data_import", {}).get( + "original_id" + ): + global_property_counter += 1 + + # Build property mapping for additional properties + node_prop_mapping = {} + for prop in node.properties: + stored_id = prop.metadata.get("aura_data_import", {}).get("original_id") + if stored_id: + node_prop_mapping[prop.name] = stored_id else: - prop_id = f"p:{property_counter}" - property_counter += 1 - - # Key property - aura_props.append(node.key_property.to_aura_data_import(prop_id, is_key=True)) - node_to_key_prop_id[node_obj_id] = prop_id - - # Additional properties for Currency node - if i == 4 and len(node.properties) > 0: - for j, prop in enumerate(node.properties): - additional_prop_id = f"p:{5 + j}" - aura_props.append(prop.to_aura_data_import(additional_prop_id, is_key=False)) - - node_label = {"$id": node_label_id, "token": node.label, "properties": aura_props} + node_prop_mapping[prop.name] = f"p:{global_property_counter}" + global_property_counter += 1 + + node_to_key_prop_id[node_obj_id] = key_prop_id + + # Use the updated Node.to_aura_data_import method + node_label, key_property, constraint, index = node.to_aura_data_import( + node_label_id, + node_obj_id, + key_prop_id, + constraint_id, + index_id, + node_prop_mapping, + ) + node_labels.append(node_label) + node_key_properties.append(key_property) + constraints.append(constraint) + indexes.append(index) # Create node object type node_object_type = { @@ -1051,45 +1170,13 @@ def to_aura_data_import_dict(self) -> dict[str, Any]: } node_object_types.append(node_object_type) - # Add key property mapping (reference node object, not node label) - key_prop_id = node_to_key_prop_id[node_obj_id] - key_property = { - "node": {"$ref": f"#{node_obj_id}"}, - "keyProperty": {"$ref": f"#{key_prop_id}"}, - } - node_key_properties.append(key_property) - - # Create constraint - constraint = { - "$id": f"c:{i}", - "name": f"{node.label}_constraint", - "constraintType": "uniqueness", - "entityType": "node", - "nodeLabel": {"$ref": f"#{node_label_id}"}, - "relationshipType": None, - "properties": [{"$ref": f"#{key_prop_id}"}], - } - constraints.append(constraint) - - # Create index - index = { - "$id": f"i:{i}", - "name": f"{node.label}_index", - "indexType": "default", - "entityType": "node", - "nodeLabel": {"$ref": f"#{node_label_id}"}, - "relationshipType": None, - "properties": [{"$ref": f"#{key_prop_id}"}], - } - indexes.append(index) - # Handle relationships - start from rt:1, r:1 (not rt:0, r:0) relationship_types = [] relationship_object_types = [] for i, rel in enumerate(self.relationships): rel_type_id = f"rt:{i + 1}" # Start from 1 - rel_obj_id = f"r:{i + 1}" # Start from 1 + rel_obj_id = f"r:{i + 1}" # Start from 1 # Find start and end node IDs start_node_id = None @@ -1100,82 +1187,250 @@ def to_aura_data_import_dict(self) -> dict[str, Any]: if node.label == rel.end_node_label: end_node_id = f"n:{j}" - rel_type, rel_obj = rel.to_aura_data_import( - rel_type_id, rel_obj_id, start_node_id, end_node_id + # Generate constraint and index IDs if relationship has key property + constraint_id = None + index_id = None + if rel.key_property: + # Continue constraint and index numbering after nodes + constraint_id = f"c:{len(self.nodes) + i}" + index_id = f"i:{len(self.nodes) + i}" + + rel_type, rel_obj, rel_constraint, rel_index = rel.to_aura_data_import( + rel_type_id, + rel_obj_id, + start_node_id, + end_node_id, + constraint_id, + index_id, ) relationship_types.append(rel_type) relationship_object_types.append(rel_obj) + # Add relationship constraints and indexes if they exist + if rel_constraint: + constraints.append(rel_constraint) + if rel_index: + indexes.append(rel_index) + # Create node mappings with property mappings for round-trip conversion + # We need to extract the property IDs from the already created node_labels node_mappings = [] + for i, node in enumerate(self.nodes): node_obj_id = f"n:{i}" - - # Create property mappings for all properties + + # Get the property IDs from the corresponding node label that was just created + node_label = node_labels[i] # This corresponds to the current node + + # Create property mappings using the exact property IDs from the node label property_mappings = [] - all_props = [node.key_property] + node.properties - - # Use the same property ID patterns as above - if i == 0: # Country node - for j, prop in enumerate(all_props): - prop_id = f"p:{i}_{j}" - field_name = prop.source.column_name if prop.source and prop.source.column_name else prop.name - property_mappings.append({ - "property": {"$ref": f"#{prop_id}"}, - "fieldName": field_name - }) - else: # Other nodes - # Key property mapping - if i == 1: # SubRegion - prop_id = "p:3" - elif i == 2: # Region - prop_id = "p:1" - elif i == 3: # TimeZones - prop_id = "p:2" - elif i == 4: # Currency - prop_id = "p:4" - - field_name = node.key_property.source.column_name if node.key_property.source and node.key_property.source.column_name else node.key_property.name - property_mappings.append({ - "property": {"$ref": f"#{prop_id}"}, - "fieldName": field_name - }) - - # Additional properties for Currency node - if i == 4 and len(node.properties) > 0: - for j, prop in enumerate(node.properties): - additional_prop_id = f"p:{5 + j}" - field_name = prop.source.column_name if prop.source and prop.source.column_name else prop.name - property_mappings.append({ - "property": {"$ref": f"#{additional_prop_id}"}, - "fieldName": field_name - }) - - # Use the property source information if available - table_name = node.key_property.source.table_name if node.key_property.source and node.key_property.source.table_name else "_.csv" - + + for prop_def in node_label["properties"]: + prop_id = prop_def["$id"] + prop_token = prop_def["token"] + + # Find the corresponding property in our node to get the field name + field_name = prop_token # default to token name + + # Check key property first + if node.key_property.name == prop_token: + field_name = ( + node.key_property.source.column_name + if node.key_property.source + and node.key_property.source.column_name + else prop_token + ) + else: + # Check other properties + for prop in node.properties: + if prop.name == prop_token: + field_name = ( + prop.source.column_name + if prop.source and prop.source.column_name + else prop_token + ) + break + + property_mappings.append( + {"property": {"$ref": f"#{prop_id}"}, "fieldName": field_name} + ) + + # Use the property source information if available, otherwise use default + table_name = ( + node.key_property.source.table_name + if node.key_property.source and node.key_property.source.table_name + else f"{node.label.lower()}.csv" + ) + node_mapping = { "node": {"$ref": f"#{node_obj_id}"}, "tableName": table_name, - "propertyMappings": property_mappings + "propertyMappings": property_mappings, } node_mappings.append(node_mapping) + # Create relationship mappings + relationship_mappings = [] + for i, rel in enumerate(self.relationships): + rel_obj_id = f"r:{i + 1}" # Start from 1 + + # Find source and target nodes + source_node = None + target_node = None + for node in self.nodes: + if node.label == rel.start_node_label: + source_node = node + if node.label == rel.end_node_label: + target_node = node + + # Use the same table as the source node, or default + table_name = ( + source_node.key_property.source.table_name + if source_node + and source_node.key_property.source + and source_node.key_property.source.table_name + else f"{source_node.label.lower()}_{rel.type.lower()}_{target_node.label.lower()}.csv" + ) + + # Generate field mappings based on node key properties + from_field = ( + source_node.key_property.source.column_name + if source_node + and source_node.key_property.source + and source_node.key_property.source.column_name + else source_node.key_property.name.lower() + ) + to_field = ( + target_node.key_property.source.column_name + if target_node + and target_node.key_property.source + and target_node.key_property.source.column_name + else target_node.key_property.name.lower() + ) + + rel_mapping = { + "relationship": {"$ref": f"#{rel_obj_id}"}, + "tableName": table_name, + "propertyMappings": [], # Empty for now, can be extended if relationships have properties + "fromMapping": {"fieldName": from_field}, + "toMapping": {"fieldName": to_field}, + } + relationship_mappings.append(rel_mapping) + # Use stored metadata if available, otherwise create defaults version = aura_metadata.get("version", "2.3.1-beta.0") datamodel_version = aura_metadata.get("dataModel_version", "2.3.1-beta.0") stored_constraints = aura_metadata.get("constraints") stored_indexes = aura_metadata.get("indexes") stored_configurations = aura_metadata.get("configurations", {"idsToIgnore": []}) - stored_data_source_schema = aura_metadata.get("dataSourceSchema", {"type": "local", "tableSchemas": []}) + + # Generate table schemas for all referenced tables + table_names = set() + for node_mapping in node_mappings: + table_names.add(node_mapping["tableName"]) + for rel_mapping in relationship_mappings: + table_names.add(rel_mapping["tableName"]) + + # Create table schemas if not stored in metadata + stored_data_source_schema = aura_metadata.get("dataSourceSchema") + if not stored_data_source_schema or not stored_data_source_schema.get( + "tableSchemas" + ): + # Determine the source type based on the properties in the data model + # Check all properties to see if any have a different source type + source_types = set() + for node in self.nodes: + if node.key_property.source and node.key_property.source.source_type: + source_types.add(node.key_property.source.source_type) + for prop in node.properties: + if prop.source and prop.source.source_type: + source_types.add(prop.source.source_type) + + for rel in self.relationships: + if rel.key_property and rel.key_property.source and rel.key_property.source.source_type: + source_types.add(rel.key_property.source.source_type) + for prop in rel.properties: + if prop.source and prop.source.source_type: + source_types.add(prop.source.source_type) + + # Default to "local" if no source types found, or use the first one found + # In practice, all properties should have the same source type for a given data model + data_source_type = source_types.pop() if source_types else "local" + + table_schemas = [] + for table_name in sorted(table_names): # Sort for consistent output + # Generate field schemas based on node/relationship mappings + fields = [] + + # Collect fields from node mappings + for node_mapping in node_mappings: + if node_mapping["tableName"] == table_name: + for prop_mapping in node_mapping["propertyMappings"]: + field_name = prop_mapping["fieldName"] + # Find the property to get its type + prop_ref = prop_mapping["property"]["$ref"].replace("#", "") + prop_type = "string" # default + + # Search for the property in node labels + for node_label in node_labels: + for prop in node_label["properties"]: + if prop["$id"] == prop_ref: + prop_type = prop["type"]["type"] + break + + fields.append( + { + "name": field_name, + "sample": f"sample_{field_name}", + "recommendedType": {"type": prop_type}, + } + ) + + # Collect fields from relationship mappings + for rel_mapping in relationship_mappings: + if rel_mapping["tableName"] == table_name: + # Add from/to fields + from_field = rel_mapping["fromMapping"]["fieldName"] + to_field = rel_mapping["toMapping"]["fieldName"] + + # Add from field if not already present + if not any(f["name"] == from_field for f in fields): + fields.append( + { + "name": from_field, + "sample": f"sample_{from_field}", + "recommendedType": {"type": "string"}, + } + ) + + # Add to field if not already present + if not any(f["name"] == to_field for f in fields): + fields.append( + { + "name": to_field, + "sample": f"sample_{to_field}", + "recommendedType": {"type": "string"}, + } + ) + + table_schemas.append({"name": table_name, "fields": fields}) + + stored_data_source_schema = {"type": data_source_type, "tableSchemas": table_schemas} + else: + stored_data_source_schema = aura_metadata.get( + "dataSourceSchema", {"type": "local", "tableSchemas": []} + ) # Reconstruct visualization nodes from node metadata and generate for new nodes visualization_nodes = [] for i, node in enumerate(self.nodes): node_id = f"n:{i}" - + # Check if node has stored visualization position - if "visualization" in node.metadata and "position" in node.metadata["visualization"]: + if ( + "visualization" in node.metadata + and "position" in node.metadata["visualization"] + ): position = node.metadata["visualization"]["position"] else: # Generate default position for new nodes @@ -1183,11 +1438,8 @@ def to_aura_data_import_dict(self) -> dict[str, Any]: row = i // 5 col = i % 5 position = {"x": col * 200.0, "y": row * 200.0} - - vis_node = { - "id": node_id, - "position": position - } + + vis_node = {"id": node_id, "position": position} visualization_nodes.append(vis_node) # Build complete structure @@ -1203,8 +1455,12 @@ def to_aura_data_import_dict(self) -> dict[str, Any]: "relationshipTypes": relationship_types, "nodeObjectTypes": node_object_types, "relationshipObjectTypes": relationship_object_types, - "constraints": stored_constraints if stored_constraints is not None else constraints, - "indexes": stored_indexes if stored_indexes is not None else indexes, + "constraints": stored_constraints + if stored_constraints is not None + else constraints, + "indexes": stored_indexes + if stored_indexes is not None + else indexes, }, }, "graphSchemaExtensionsRepresentation": { @@ -1213,7 +1469,7 @@ def to_aura_data_import_dict(self) -> dict[str, Any]: "graphMappingRepresentation": { "dataSourceSchema": stored_data_source_schema, "nodeMappings": node_mappings, - "relationshipMappings": [], + "relationshipMappings": relationship_mappings, }, "configurations": stored_configurations, }, diff --git a/servers/mcp-neo4j-data-modeling/tests/integration/test_aura_data_import_conversion_IT.py b/servers/mcp-neo4j-data-modeling/tests/integration/test_aura_data_import_conversion_IT.py deleted file mode 100644 index 2311513..0000000 --- a/servers/mcp-neo4j-data-modeling/tests/integration/test_aura_data_import_conversion_IT.py +++ /dev/null @@ -1,46 +0,0 @@ -import pytest -from mcp_neo4j_data_modeling.data_model import DataModel -import json -from typing import Any - -@pytest.fixture -def aura_data_import_model() -> dict[str, Any]: - with open("tests/resources/neo4j_importer_model_2025-06-30.json", "r") as f: - return json.load(f) - - -def test_aura_data_import_round_trip_data_integrity(aura_data_import_model: dict[str, Any]) -> None: - """Test that Aura Data Import model preserves essential data integrity through round-trip conversion.""" - # Load the model - data_model = DataModel.from_aura_data_import(aura_data_import_model) - - # Convert back to Aura Data Import format - converted_back = data_model.to_aura_data_import_dict() - - # Check top-level structure - assert converted_back["version"] == aura_data_import_model["version"] - assert converted_back["dataModel"]["version"] == aura_data_import_model["dataModel"]["version"] - - # Check that all nodes are preserved - original_node_labels = { - nl["token"] for nl in aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["nodeLabels"] - } - converted_node_labels = { - nl["token"] for nl in converted_back["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["nodeLabels"] - } - assert original_node_labels == converted_node_labels - - # Check that all relationships are preserved - original_rel_types = { - rt["token"] for rt in aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["relationshipTypes"] - } - converted_rel_types = { - rt["token"] for rt in converted_back["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["relationshipTypes"] - } - assert original_rel_types == converted_rel_types - - # Check that visualization nodes are preserved for all nodes - assert len(converted_back["visualisation"]["nodes"]) == len(aura_data_import_model["visualisation"]["nodes"]) - - # Check that metadata was preserved - assert converted_back["dataModel"]["configurations"] == aura_data_import_model["dataModel"]["configurations"] \ No newline at end of file diff --git a/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py b/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py index 06f8130..6c545da 100644 --- a/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py +++ b/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py @@ -1,17 +1,28 @@ """Unit tests for Aura Data Import conversion methods.""" import json -import pytest from pathlib import Path +from typing import Any + +import pytest -from mcp_neo4j_data_modeling.data_model import DataModel, Node, Property, Relationship, PropertySource +from mcp_neo4j_data_modeling.data_model import ( + DataModel, + Node, + Property, + Relationship, +) @pytest.fixture def sample_aura_data_import_model(): """Load the sample Aura Data Import model from the JSON file.""" - json_file = Path(__file__).parent.parent / "resources" / "neo4j_importer_model_2025-06-30.json" - with open(json_file, 'r') as f: + json_file = ( + Path(__file__).parent.parent + / "resources" + / "neo4j_importer_model_2025-06-30.json" + ) + with open(json_file, "r") as f: return json.load(f) @@ -22,18 +33,14 @@ def sample_property_data(): "$id": "p:0_0", "token": "countryId", "type": {"type": "integer"}, - "nullable": False + "nullable": False, } @pytest.fixture def sample_source_mapping(): """Sample source mapping for a property.""" - return { - "tableName": "countries.csv", - "fieldName": "id", - "type": "local" - } + return {"tableName": "countries.csv", "fieldName": "id", "type": "local"} @pytest.fixture @@ -47,9 +54,9 @@ def sample_node_label(): "$id": "p:3", "token": "subregion", "type": {"type": "string"}, - "nullable": True + "nullable": True, } - ] + ], } @@ -59,12 +66,7 @@ def sample_node_mapping(): return { "node": {"$ref": "#n:1"}, "tableName": "countries.csv", - "propertyMappings": [ - { - "fieldName": "subregion", - "property": {"$ref": "#p:3"} - } - ] + "propertyMappings": [{"fieldName": "subregion", "property": {"$ref": "#p:3"}}], } @@ -77,11 +79,11 @@ def test_from_aura_data_import_string_property(self, sample_source_mapping): "$id": "p:1", "token": "name", "type": {"type": "string"}, - "nullable": False + "nullable": False, } - + prop = Property.from_aura_data_import(aura_property, sample_source_mapping) - + assert prop.name == "name" assert prop.type == "STRING" assert prop.source.column_name == "id" @@ -94,11 +96,11 @@ def test_from_aura_data_import_integer_property(self, sample_source_mapping): "$id": "p:0_0", "token": "countryId", "type": {"type": "integer"}, - "nullable": False + "nullable": False, } - + prop = Property.from_aura_data_import(aura_property, sample_source_mapping) - + assert prop.name == "countryId" assert prop.type == "INTEGER" @@ -108,11 +110,11 @@ def test_from_aura_data_import_float_property(self, sample_source_mapping): "$id": "p:0_15", "token": "latitude", "type": {"type": "float"}, - "nullable": False + "nullable": False, } - + prop = Property.from_aura_data_import(aura_property, sample_source_mapping) - + assert prop.name == "latitude" assert prop.type == "FLOAT" @@ -122,57 +124,59 @@ def test_from_aura_data_import_boolean_property(self, sample_source_mapping): "$id": "p:7", "token": "active", "type": {"type": "boolean"}, - "nullable": True + "nullable": True, } - + prop = Property.from_aura_data_import(aura_property, sample_source_mapping) - + assert prop.name == "active" assert prop.type == "BOOLEAN" def test_to_aura_data_import_key_property(self): """Test converting a key property to Aura Data Import format.""" prop = Property(name="id", type="INTEGER") - + result = prop.to_aura_data_import("p:0_0", is_key=True) - + assert result["$id"] == "p:0_0" assert result["token"] == "id" assert result["type"]["type"] == "integer" - assert result["nullable"] == False # Key properties are not nullable + assert not result["nullable"] # Key properties are not nullable def test_to_aura_data_import_non_key_property(self): """Test converting a non-key property to Aura Data Import format.""" prop = Property(name="name", type="STRING") - + result = prop.to_aura_data_import("p:0_1", is_key=False) - + assert result["$id"] == "p:0_1" assert result["token"] == "name" assert result["type"]["type"] == "string" - assert result["nullable"] == True # Non-key properties are nullable + assert result["nullable"] # Non-key properties are nullable def test_to_aura_data_import_unknown_type_defaults_to_string(self): """Test that unknown property types default to string.""" prop = Property(name="custom", type="CUSTOM_TYPE") - + result = prop.to_aura_data_import("p:1", is_key=False) - + assert result["type"]["type"] == "string" class TestNodeConversion: """Test Node conversion methods.""" - def test_from_aura_data_import_simple_node(self, sample_node_label, sample_node_mapping): + def test_from_aura_data_import_simple_node( + self, sample_node_label, sample_node_mapping + ): """Test converting a simple node from Aura Data Import format.""" node = Node.from_aura_data_import( - sample_node_label, + sample_node_label, "subregion", # key property token sample_node_mapping, - "local" # source_type + "local", # source_type ) - + assert node.label == "SubRegion" assert node.key_property.name == "subregion" assert node.key_property.type == "STRING" @@ -181,36 +185,44 @@ def test_from_aura_data_import_simple_node(self, sample_node_label, sample_node_ def test_from_aura_data_import_complex_node(self, sample_aura_data_import_model): """Test converting a complex node with multiple properties.""" # Get the Country node from the sample data - country_node_label = sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["nodeLabels"][0] - country_node_mapping = sample_aura_data_import_model["dataModel"]["graphMappingRepresentation"]["nodeMappings"][0] - + country_node_label = sample_aura_data_import_model["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"]["nodeLabels"][0] + country_node_mapping = sample_aura_data_import_model["dataModel"][ + "graphMappingRepresentation" + ]["nodeMappings"][0] + node = Node.from_aura_data_import( country_node_label, "countryId", # key property token country_node_mapping, - "local" # source_type + "local", # source_type ) - + assert node.label == "Country" assert node.key_property.name == "countryId" assert node.key_property.type == "INTEGER" - assert len(node.properties) == 12 # 13 total properties - 1 key = 12 non-key properties - + assert ( + len(node.properties) == 12 + ) # 13 total properties - 1 key = 12 non-key properties + # Check some specific properties property_names = [p.name for p in node.properties] assert "name" in property_names assert "iso3" in property_names assert "latitude" in property_names - def test_from_aura_data_import_missing_key_property_uses_first(self, sample_node_label, sample_node_mapping): + def test_from_aura_data_import_missing_key_property_uses_first( + self, sample_node_label, sample_node_mapping + ): """Test that when key property is not found, first property is used as key.""" node = Node.from_aura_data_import( sample_node_label, "nonexistent_key", # This key doesn't exist sample_node_mapping, - "local" # source_type + "local", # source_type ) - + assert node.label == "SubRegion" assert node.key_property.name == "subregion" # First property becomes key assert len(node.properties) == 0 @@ -220,34 +232,54 @@ def test_to_aura_data_import_simple_node(self): key_prop = Property(name="id", type="INTEGER") other_prop = Property(name="name", type="STRING") node = Node(label="TestNode", key_property=key_prop, properties=[other_prop]) - - node_label, key_property = node.to_aura_data_import("nl:0") - + + node_label, key_property, constraint, index = node.to_aura_data_import( + "nl:0", "n:0", "p:0_0", "c:0", "i:0" + ) + # Check node label assert node_label["$id"] == "nl:0" assert node_label["token"] == "TestNode" assert len(node_label["properties"]) == 2 - + # Check key property is first and not nullable assert node_label["properties"][0]["token"] == "id" - assert node_label["properties"][0]["nullable"] == False + assert not node_label["properties"][0]["nullable"] assert node_label["properties"][1]["token"] == "name" - assert node_label["properties"][1]["nullable"] == True - + assert node_label["properties"][1]["nullable"] + # Check key property mapping - assert key_property["node"]["$ref"] == "#nl:0" + assert key_property["node"]["$ref"] == "#n:0" assert key_property["keyProperty"]["$ref"] == "#p:0_0" + # Check constraint + assert constraint["$id"] == "c:0" + assert constraint["name"] == "TestNode_constraint" + assert constraint["constraintType"] == "uniqueness" + assert constraint["entityType"] == "node" + assert constraint["nodeLabel"]["$ref"] == "#nl:0" + assert constraint["properties"][0]["$ref"] == "#p:0_0" + + # Check index + assert index["$id"] == "i:0" + assert index["name"] == "TestNode_index" + assert index["indexType"] == "default" + assert index["entityType"] == "node" + assert index["nodeLabel"]["$ref"] == "#nl:0" + assert index["properties"][0]["$ref"] == "#p:0_0" + def test_node_mapping_property_not_found_raises_error(self, sample_node_label): """Test that missing property in node mapping raises an error.""" invalid_mapping = { "node": {"$ref": "#n:1"}, "tableName": "countries.csv", - "propertyMappings": [] # Empty mappings + "propertyMappings": [], # Empty mappings } - + with pytest.raises(ValueError, match="Property p:3 not found in node mapping"): - Node.from_aura_data_import(sample_node_label, "subregion", invalid_mapping, "local") + Node.from_aura_data_import( + sample_node_label, "subregion", invalid_mapping, "local" + ) class TestRelationshipConversion: @@ -255,35 +287,28 @@ class TestRelationshipConversion: def test_from_aura_data_import_simple_relationship(self): """Test converting a simple relationship from Aura Data Import format.""" - rel_type = { - "$id": "rt:1", - "token": "IN_SUBREGION", - "properties": [] - } - + rel_type = {"$id": "rt:1", "token": "IN_SUBREGION", "properties": []} + rel_obj = { "$id": "r:1", "type": {"$ref": "#rt:1"}, "from": {"$ref": "#n:0"}, - "to": {"$ref": "#n:1"} - } - - node_id_to_label_map = { - "#n:0": "Country", - "#n:1": "SubRegion" + "to": {"$ref": "#n:1"}, } - + + node_id_to_label_map = {"#n:0": "Country", "#n:1": "SubRegion"} + # Empty relationship mapping since there are no properties rel_mapping = { "relationship": {"$ref": "#r:1"}, "tableName": "relationships.csv", - "propertyMappings": [] + "propertyMappings": [], } - + relationship = Relationship.from_aura_data_import( rel_type, rel_obj, node_id_to_label_map, rel_mapping, "local" ) - + assert relationship.type == "IN_SUBREGION" assert relationship.start_node_label == "Country" assert relationship.end_node_label == "SubRegion" @@ -300,49 +325,40 @@ def test_from_aura_data_import_relationship_with_properties(self): "$id": "p:rel_1", "token": "weight", "type": {"type": "float"}, - "nullable": False + "nullable": False, }, { "$id": "p:rel_2", "token": "since", "type": {"type": "string"}, - "nullable": True - } - ] + "nullable": True, + }, + ], } - + rel_obj = { "$id": "r:2", "type": {"$ref": "#rt:2"}, "from": {"$ref": "#n:0"}, - "to": {"$ref": "#n:1"} + "to": {"$ref": "#n:1"}, } - - node_id_to_label_map = { - "#n:0": "NodeA", - "#n:1": "NodeB" - } - + + node_id_to_label_map = {"#n:0": "NodeA", "#n:1": "NodeB"} + # Relationship mapping with properties rel_mapping = { "relationship": {"$ref": "#r:2"}, "tableName": "relationships.csv", "propertyMappings": [ - { - "property": {"$ref": "#p:rel_1"}, - "fieldName": "weight" - }, - { - "property": {"$ref": "#p:rel_2"}, - "fieldName": "since" - } - ] + {"property": {"$ref": "#p:rel_1"}, "fieldName": "weight"}, + {"property": {"$ref": "#p:rel_2"}, "fieldName": "since"}, + ], } - + relationship = Relationship.from_aura_data_import( rel_type, rel_obj, node_id_to_label_map, rel_mapping, "local" ) - + assert relationship.type == "CONNECTED_TO" assert relationship.key_property.name == "weight" # First property becomes key assert len(relationship.properties) == 1 # Second property @@ -351,45 +367,67 @@ def test_from_aura_data_import_relationship_with_properties(self): def test_to_aura_data_import_simple_relationship(self): """Test converting a simple relationship to Aura Data Import format.""" relationship = Relationship( - type="KNOWS", - start_node_label="Person", - end_node_label="Person" + type="KNOWS", start_node_label="Person", end_node_label="Person" ) - - rel_type, rel_obj = relationship.to_aura_data_import("rt:1", "r:1", "n:0", "n:1") - + + rel_type, rel_obj, constraint, index = relationship.to_aura_data_import( + "rt:1", "r:1", "n:0", "n:1" + ) + # Check relationship type assert rel_type["$id"] == "rt:1" assert rel_type["token"] == "KNOWS" assert len(rel_type["properties"]) == 0 - + # Check relationship object assert rel_obj["$id"] == "r:1" assert rel_obj["type"]["$ref"] == "#rt:1" assert rel_obj["from"]["$ref"] == "#n:0" assert rel_obj["to"]["$ref"] == "#n:1" + # Check that constraint and index are None (no key property) + assert constraint is None + assert index is None + def test_to_aura_data_import_relationship_with_properties(self): """Test converting a relationship with properties to Aura Data Import format.""" key_prop = Property(name="relationshipId", type="INTEGER") other_prop = Property(name="strength", type="FLOAT") - + relationship = Relationship( type="CONNECTED", start_node_label="NodeA", end_node_label="NodeB", key_property=key_prop, - properties=[other_prop] + properties=[other_prop], ) - - rel_type, rel_obj = relationship.to_aura_data_import("rt:2", "r:2", "n:0", "n:1") - + + rel_type, rel_obj, constraint, index = relationship.to_aura_data_import( + "rt:2", "r:2", "n:0", "n:1", "c:5", "i:5" + ) + # Check relationship type has properties assert len(rel_type["properties"]) == 2 assert rel_type["properties"][0]["token"] == "relationshipId" - assert rel_type["properties"][0]["nullable"] == False # Key property + assert not rel_type["properties"][0]["nullable"] # Key property assert rel_type["properties"][1]["token"] == "strength" - assert rel_type["properties"][1]["nullable"] == True # Non-key property + assert rel_type["properties"][1]["nullable"] # Non-key property + + # Check constraint (should exist since relationship has key property) + assert constraint is not None + assert constraint["$id"] == "c:5" + assert constraint["name"] == "CONNECTED_constraint" + assert constraint["constraintType"] == "uniqueness" + assert constraint["entityType"] == "relationship" + assert constraint["relationshipType"]["$ref"] == "#rt:2" + + # Check index (should exist since relationship has key property) + assert index is not None + assert index["$id"] == "i:5" + assert index["name"] == "CONNECTED_index" + assert index["indexType"] == "default" + assert index["entityType"] == "relationship" + assert index["relationshipType"]["$ref"] == "#rt:2" class TestDataModelConversion: @@ -398,7 +436,7 @@ class TestDataModelConversion: def test_from_aura_data_import_full_model(self, sample_aura_data_import_model): """Test converting the full sample Aura Data Import model.""" data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) - + # Check nodes assert len(data_model.nodes) == 5 node_labels = [n.label for n in data_model.nodes] @@ -407,7 +445,7 @@ def test_from_aura_data_import_full_model(self, sample_aura_data_import_model): assert "Region" in node_labels assert "TimeZones" in node_labels assert "Currency" in node_labels - + # Check relationships assert len(data_model.relationships) == 4 rel_types = [r.type for r in data_model.relationships] @@ -416,15 +454,17 @@ def test_from_aura_data_import_full_model(self, sample_aura_data_import_model): assert "IN_TIMEZONE" in rel_types assert "USES_CURRENCY" in rel_types - def test_from_aura_data_import_node_key_properties(self, sample_aura_data_import_model): + def test_from_aura_data_import_node_key_properties( + self, sample_aura_data_import_model + ): """Test that node key properties are correctly identified.""" data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) - + # Find specific nodes and check their key properties country_node = next(n for n in data_model.nodes if n.label == "Country") assert country_node.key_property.name == "countryId" assert country_node.key_property.type == "INTEGER" - + region_node = next(n for n in data_model.nodes if n.label == "Region") assert region_node.key_property.name == "region" assert region_node.key_property.type == "STRING" @@ -434,33 +474,31 @@ def test_to_aura_data_import_dict_structure(self): # Create a simple data model key_prop = Property(name="id", type="INTEGER") node1 = Node(label="TestNode", key_property=key_prop) - + rel = Relationship( - type="TEST_REL", - start_node_label="TestNode", - end_node_label="TestNode" + type="TEST_REL", start_node_label="TestNode", end_node_label="TestNode" ) - + data_model = DataModel(nodes=[node1], relationships=[rel]) - + result = data_model.to_aura_data_import_dict() - + # Check top-level structure assert "version" in result assert "visualisation" in result assert "dataModel" in result - + # Check visualization structure assert "nodes" in result["visualisation"] assert len(result["visualisation"]["nodes"]) == 1 - + # Check data model structure data_model_content = result["dataModel"] assert "graphSchemaRepresentation" in data_model_content assert "graphSchemaExtensionsRepresentation" in data_model_content assert "graphMappingRepresentation" in data_model_content assert "configurations" in data_model_content - + # Check graph schema graph_schema = data_model_content["graphSchemaRepresentation"]["graphSchema"] assert "nodeLabels" in graph_schema @@ -475,18 +513,18 @@ def test_to_aura_data_import_dict_node_constraints_and_indexes(self): key_prop = Property(name="userId", type="INTEGER") node = Node(label="User", key_property=key_prop) data_model = DataModel(nodes=[node]) - + result = data_model.to_aura_data_import_dict() - + graph_schema = result["dataModel"]["graphSchemaRepresentation"]["graphSchema"] - + # Check constraints assert len(graph_schema["constraints"]) == 1 constraint = graph_schema["constraints"][0] assert constraint["name"] == "User_constraint" assert constraint["constraintType"] == "uniqueness" assert constraint["entityType"] == "node" - + # Check indexes assert len(graph_schema["indexes"]) == 1 index = graph_schema["indexes"][0] @@ -500,13 +538,13 @@ def test_round_trip_conversion_simple(self): key_prop = Property(name="id", type="STRING") node = Node(label="TestNode", key_property=key_prop) original_model = DataModel(nodes=[node]) - + # Convert to Aura format aura_dict = original_model.to_aura_data_import_dict() - + # Convert back converted_model = DataModel.from_aura_data_import(aura_dict) - + # Check that essential structure is preserved assert len(converted_model.nodes) == 1 assert converted_model.nodes[0].label == "TestNode" @@ -520,25 +558,23 @@ def test_round_trip_conversion_with_relationships(self): key_prop2 = Property(name="id2", type="STRING") node1 = Node(label="Node1", key_property=key_prop1) node2 = Node(label="Node2", key_property=key_prop2) - + rel = Relationship( - type="CONNECTS", - start_node_label="Node1", - end_node_label="Node2" + type="CONNECTS", start_node_label="Node1", end_node_label="Node2" ) - + original_model = DataModel(nodes=[node1, node2], relationships=[rel]) - + # Convert to Aura format and back aura_dict = original_model.to_aura_data_import_dict() converted_model = DataModel.from_aura_data_import(aura_dict) - + # Check nodes assert len(converted_model.nodes) == 2 node_labels = [n.label for n in converted_model.nodes] assert "Node1" in node_labels assert "Node2" in node_labels - + # Check relationships assert len(converted_model.relationships) == 1 assert converted_model.relationships[0].type == "CONNECTS" @@ -549,7 +585,7 @@ def test_json_serialization(self, sample_aura_data_import_model): """Test that the converted model can be serialized to JSON.""" data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) json_str = data_model.to_aura_data_import_json_str() - + # Should be valid JSON parsed = json.loads(json_str) assert isinstance(parsed, dict) @@ -559,11 +595,11 @@ def test_metadata_preservation_round_trip(self, sample_aura_data_import_model): """Test that metadata (constraints, indexes, version, configurations) is preserved during round-trip conversion.""" # Convert from Aura Data Import to our model data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) - + # Verify metadata was stored assert "aura_data_import" in data_model.metadata aura_metadata = data_model.metadata["aura_data_import"] - + # Check that all expected metadata fields are present assert "version" in aura_metadata assert "dataModel_version" in aura_metadata @@ -571,51 +607,89 @@ def test_metadata_preservation_round_trip(self, sample_aura_data_import_model): assert "indexes" in aura_metadata assert "configurations" in aura_metadata assert "dataSourceSchema" in aura_metadata - + # Verify the stored values match the original assert aura_metadata["version"] == sample_aura_data_import_model["version"] - assert aura_metadata["dataModel_version"] == sample_aura_data_import_model["dataModel"]["version"] - assert aura_metadata["constraints"] == sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["constraints"] - assert aura_metadata["indexes"] == sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["indexes"] - assert aura_metadata["configurations"] == sample_aura_data_import_model["dataModel"]["configurations"] - + assert ( + aura_metadata["dataModel_version"] + == sample_aura_data_import_model["dataModel"]["version"] + ) + assert ( + aura_metadata["constraints"] + == sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ]["constraints"] + ) + assert ( + aura_metadata["indexes"] + == sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ]["indexes"] + ) + assert ( + aura_metadata["configurations"] + == sample_aura_data_import_model["dataModel"]["configurations"] + ) + # Check that visualization data was stored in node metadata original_vis_nodes = sample_aura_data_import_model["visualisation"]["nodes"] for i, node in enumerate(data_model.nodes): node_id = f"n:{i}" - original_vis_node = next((v for v in original_vis_nodes if v["id"] == node_id), None) + original_vis_node = next( + (v for v in original_vis_nodes if v["id"] == node_id), None + ) if original_vis_node: assert "visualization" in node.metadata - assert node.metadata["visualization"]["position"] == original_vis_node["position"] - + assert ( + node.metadata["visualization"]["position"] + == original_vis_node["position"] + ) + # Convert back to Aura Data Import converted_back = data_model.to_aura_data_import_dict() - + # Verify that the metadata was restored assert converted_back["version"] == sample_aura_data_import_model["version"] - assert converted_back["dataModel"]["version"] == sample_aura_data_import_model["dataModel"]["version"] - assert converted_back["dataModel"]["configurations"] == sample_aura_data_import_model["dataModel"]["configurations"] - + assert ( + converted_back["dataModel"]["version"] + == sample_aura_data_import_model["dataModel"]["version"] + ) + assert ( + converted_back["dataModel"]["configurations"] + == sample_aura_data_import_model["dataModel"]["configurations"] + ) + # Verify that visualization was reconstructed correctly assert "visualisation" in converted_back assert "nodes" in converted_back["visualisation"] assert len(converted_back["visualisation"]["nodes"]) == len(data_model.nodes) - + # Check that positions were preserved for existing nodes original_vis_nodes = sample_aura_data_import_model["visualisation"]["nodes"] converted_vis_nodes = converted_back["visualisation"]["nodes"] for original_vis_node in original_vis_nodes: - converted_vis_node = next((v for v in converted_vis_nodes if v["id"] == original_vis_node["id"]), None) + converted_vis_node = next( + (v for v in converted_vis_nodes if v["id"] == original_vis_node["id"]), + None, + ) if converted_vis_node: assert converted_vis_node["position"] == original_vis_node["position"] - + # Check that constraints and indexes were preserved - original_constraints = sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["constraints"] - converted_constraints = converted_back["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["constraints"] + original_constraints = sample_aura_data_import_model["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"]["constraints"] + converted_constraints = converted_back["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"]["constraints"] assert converted_constraints == original_constraints - - original_indexes = sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["indexes"] - converted_indexes = converted_back["dataModel"]["graphSchemaRepresentation"]["graphSchema"]["indexes"] + + original_indexes = sample_aura_data_import_model["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"]["indexes"] + converted_indexes = converted_back["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ]["indexes"] assert converted_indexes == original_indexes def test_export_without_metadata_uses_defaults(self): @@ -624,55 +698,70 @@ def test_export_without_metadata_uses_defaults(self): key_prop = Property(name="id", type="INTEGER") node = Node(label="TestNode", key_property=key_prop) data_model = DataModel(nodes=[node]) - + # Export to Aura Data Import format aura_dict = data_model.to_aura_data_import_dict() - + # Verify default values are used assert aura_dict["version"] == "2.3.1-beta.0" assert aura_dict["dataModel"]["version"] == "2.3.1-beta.0" assert aura_dict["dataModel"]["configurations"] == {"idsToIgnore": []} - assert aura_dict["dataModel"]["graphMappingRepresentation"]["dataSourceSchema"] == {"type": "local", "tableSchemas": []} + # Verify that table schemas are automatically generated (not empty) + data_source_schema = aura_dict["dataModel"]["graphMappingRepresentation"]["dataSourceSchema"] + assert data_source_schema["type"] == "local" + assert len(data_source_schema["tableSchemas"]) == 1 + assert data_source_schema["tableSchemas"][0]["name"] == "testnode.csv" + assert len(data_source_schema["tableSchemas"][0]["fields"]) == 1 + assert data_source_schema["tableSchemas"][0]["fields"][0]["name"] == "id" + # Verify visualization nodes are generated assert "visualisation" in aura_dict assert "nodes" in aura_dict["visualisation"] assert len(aura_dict["visualisation"]["nodes"]) == 1 assert aura_dict["visualisation"]["nodes"][0]["id"] == "n:0" - + # Verify constraints and indexes are generated for the node - graph_schema = aura_dict["dataModel"]["graphSchemaRepresentation"]["graphSchema"] + graph_schema = aura_dict["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ] assert len(graph_schema["constraints"]) == 1 assert len(graph_schema["indexes"]) == 1 assert graph_schema["constraints"][0]["name"] == "TestNode_constraint" assert graph_schema["indexes"][0]["name"] == "TestNode_index" - def test_visualization_reconstruction_with_new_nodes(self, sample_aura_data_import_model): + def test_visualization_reconstruction_with_new_nodes( + self, sample_aura_data_import_model + ): """Test that visualization is properly reconstructed when new nodes are added.""" # Convert from Aura Data Import to our model data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) - + # Add a new node that wasn't in the original data new_key_prop = Property(name="newId", type="STRING") new_node = Node(label="NewNode", key_property=new_key_prop) data_model.add_node(new_node) - + # Convert back to Aura Data Import converted_back = data_model.to_aura_data_import_dict() - + # Verify visualization includes all nodes (original + new) vis_nodes = converted_back["visualisation"]["nodes"] assert len(vis_nodes) == len(data_model.nodes) - + # Check that original nodes kept their positions original_vis_nodes = sample_aura_data_import_model["visualisation"]["nodes"] for original_vis_node in original_vis_nodes: - converted_vis_node = next((v for v in vis_nodes if v["id"] == original_vis_node["id"]), None) + converted_vis_node = next( + (v for v in vis_nodes if v["id"] == original_vis_node["id"]), None + ) if converted_vis_node: assert converted_vis_node["position"] == original_vis_node["position"] - + # Check that new node got a default position - new_node_id = f"n:{len(data_model.nodes) - 1}" # Last node should be the new one + new_node_id = ( + f"n:{len(data_model.nodes) - 1}" # Last node should be the new one + ) new_vis_node = next((v for v in vis_nodes if v["id"] == new_node_id), None) assert new_vis_node is not None assert "position" in new_vis_node @@ -686,15 +775,17 @@ class TestEdgeCases: def test_empty_data_model_conversion(self): """Test converting an empty data model.""" empty_model = DataModel() - + aura_dict = empty_model.to_aura_data_import_dict() - + # Should have basic structure even when empty assert "version" in aura_dict assert "visualisation" in aura_dict assert len(aura_dict["visualisation"]["nodes"]) == 0 - - graph_schema = aura_dict["dataModel"]["graphSchemaRepresentation"]["graphSchema"] + + graph_schema = aura_dict["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ] assert len(graph_schema["nodeLabels"]) == 0 assert len(graph_schema["relationshipTypes"]) == 0 @@ -703,12 +794,14 @@ def test_node_with_no_properties_mapping(self, sample_node_label): empty_mapping = { "node": {"$ref": "#n:1"}, "tableName": "unknown", - "propertyMappings": [] + "propertyMappings": [], } - + # Should raise error when property is not found in mapping with pytest.raises(ValueError): - Node.from_aura_data_import(sample_node_label, "subregion", empty_mapping, "local") + Node.from_aura_data_import( + sample_node_label, "subregion", empty_mapping, "local" + ) def test_malformed_aura_data_missing_required_fields(self): """Test handling of malformed Aura Data Import data.""" @@ -716,7 +809,7 @@ def test_malformed_aura_data_missing_required_fields(self): "version": "2.3.1-beta.0", # Missing visualisation and dataModel } - + with pytest.raises(KeyError): DataModel.from_aura_data_import(malformed_data) @@ -727,12 +820,71 @@ def test_property_type_edge_cases(self, sample_source_mapping): "$id": "p:unknown", "token": "unknown", "type": {"type": "unknown_type"}, - "nullable": False + "nullable": False, } - + prop = Property.from_aura_data_import(unknown_type_prop, sample_source_mapping) assert prop.type == "UNKNOWN_TYPE" # Should uppercase unknown types - + # Test conversion back result = prop.to_aura_data_import("p:test", is_key=False) - assert result["type"]["type"] == "string" # Should default to string \ No newline at end of file + assert result["type"]["type"] == "string" # Should default to string + + +def test_aura_data_import_round_trip_data_integrity( + sample_aura_data_import_model: dict[str, Any], +) -> None: + """Test that Aura Data Import model preserves essential data integrity through round-trip conversion.""" + # Load the model + data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) + + # Convert back to Aura Data Import format + converted_back = data_model.to_aura_data_import_dict() + + # Check top-level structure + assert converted_back["version"] == sample_aura_data_import_model["version"] + assert ( + converted_back["dataModel"]["version"] + == sample_aura_data_import_model["dataModel"]["version"] + ) + + # Check that all nodes are preserved + original_node_labels = { + nl["token"] + for nl in sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ]["nodeLabels"] + } + converted_node_labels = { + nl["token"] + for nl in converted_back["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ]["nodeLabels"] + } + assert original_node_labels == converted_node_labels + + # Check that all relationships are preserved + original_rel_types = { + rt["token"] + for rt in sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ]["relationshipTypes"] + } + converted_rel_types = { + rt["token"] + for rt in converted_back["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ]["relationshipTypes"] + } + assert original_rel_types == converted_rel_types + + # Check that visualization nodes are preserved for all nodes + assert len(converted_back["visualisation"]["nodes"]) == len( + sample_aura_data_import_model["visualisation"]["nodes"] + ) + + # Check that metadata was preserved + assert ( + converted_back["dataModel"]["configurations"] + == sample_aura_data_import_model["dataModel"]["configurations"] + ) \ No newline at end of file From 846de11f20cb62c6cb3e3327ccc04bebabe0bfe0 Mon Sep 17 00:00:00 2001 From: alex Date: Tue, 1 Jul 2025 09:01:49 -0500 Subject: [PATCH 3/6] update data model doc strings --- .../src/mcp_neo4j_data_modeling/data_model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py index a86a87e..25ba303 100644 --- a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py @@ -1,6 +1,6 @@ import json from collections import Counter -from typing import Any +from typing import Any, Literal from pydantic import BaseModel, Field, ValidationInfo, field_validator @@ -45,9 +45,9 @@ class PropertySource(BaseModel): default=None, description="The location of the property, if known. May be a file path, URL, etc.", ) - source_type: str | None = Field( + source_type: Literal["local", "remote"] | None = Field( default=None, - description="The type of the data source: 'local' or 'remote'.", + description="The type of the data source: 'local' or 'remote'. 'local' means the data source is a file or database table on the local machine. 'remote' means the data source is a file or database table on a remote machine.", ) @@ -60,7 +60,7 @@ class Property(BaseModel): description="The Neo4j type of the property. Should be all caps.", ) source: PropertySource | None = Field( - default=None, description="The source of the property, if known." + default=None, description="The source of the property, if known. For example this may be a CSV file or a database table." ) description: str | None = Field( default=None, description="The description of the property" From 38422b216f4732284a242255fb57c0ccf140dd17 Mon Sep 17 00:00:00 2001 From: alex Date: Tue, 1 Jul 2025 09:31:16 -0500 Subject: [PATCH 4/6] implement TypedDict models representing the structure of the Aura Data Import json file --- .../aura_data_import/models.py | 479 ++++++++++-------- .../src/mcp_neo4j_data_modeling/data_model.py | 176 +++++-- .../unit/test_aura_data_import_conversion.py | 29 +- 3 files changed, 431 insertions(+), 253 deletions(-) diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/aura_data_import/models.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/aura_data_import/models.py index 2afe07d..fdc298e 100644 --- a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/aura_data_import/models.py +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/aura_data_import/models.py @@ -1,235 +1,316 @@ +""" +These are the models that make up the data model structure generated by the Aura Data Import console tool. +""" + from typing import List, Literal, Optional, TypedDict +AuraDataImportSupportedTypes = Literal[ + "string", "integer", "float", "boolean" +] # supported types in Aura Data Import -class Position(TypedDict): - x: float - y: float +Position = TypedDict( + "Position", + { + "x": float, + "y": float, + }, +) -class AuraDataImportVisualisationNode(TypedDict): - id: str - position: Position +AuraDataImportVisualisationNode = TypedDict( + "AuraDataImportVisualisationNode", + { + "id": str, + "position": Position, + }, +) # Property and Type Definitions -class PropertyType(TypedDict): - type: Literal["string", "integer", "float", "boolean"] - +PropertyType = TypedDict( + "PropertyType", + { + "type": AuraDataImportSupportedTypes, + }, +) -class Property(TypedDict): - """Property definition with $id, token, type, and nullable fields.""" - __dollar_id: str # Represents "$id" field - token: str - type: PropertyType - nullable: bool +Property = TypedDict( + "Property", + { + "$id": str, + "token": str, + "type": PropertyType, + "nullable": bool, + }, +) # Node and Relationship Schema Types -class NodeLabel(TypedDict): - """Node label definition with properties.""" - - __dollar_id: str # Represents "$id" field - token: str - properties: List[Property] - - -class RelationshipType(TypedDict): - """Relationship type definition.""" - - __dollar_id: str # Represents "$id" field - token: str - properties: List[Property] - - -class LabelRef(TypedDict): - """Reference to a node label.""" - - __dollar_ref: str # Represents "$ref" field - - -class NodeObjectType(TypedDict): - """Node object type with labels.""" - - __dollar_id: str # Represents "$id" field - labels: List[LabelRef] - - -class TypeRef(TypedDict): - """Reference to a relationship type.""" - - __dollar_ref: str # Represents "$ref" field - - -class NodeRef(TypedDict): - """Reference to a node.""" - - __dollar_ref: str # Represents "$ref" field - - -class RelationshipObjectType(TypedDict): - """Relationship object type definition.""" - - __dollar_id: str # Represents "$id" field - type: TypeRef - __from: NodeRef # Represents "from" field (Python keyword) - to: NodeRef - - -class PropertyRef(TypedDict): - """Reference to a property.""" - - __dollar_ref: str # Represents "$ref" field +NodeLabel = TypedDict( + "NodeLabel", + { + "$id": str, + "token": str, + "properties": List[Property], + }, +) + + +RelationshipType = TypedDict( + "RelationshipType", + { + "$id": str, + "token": str, + "properties": List[Property], + }, +) + + +LabelRef = TypedDict( + "LabelRef", + { + "$ref": str, + }, +) + + +NodeObjectType = TypedDict( + "NodeObjectType", + { + "$id": str, + "labels": List[LabelRef], + }, +) + + +TypeRef = TypedDict( + "TypeRef", + { + "$ref": str, + }, +) + + +NodeRef = TypedDict( + "NodeRef", + { + "$ref": str, + }, +) + + +RelationshipObjectType = TypedDict( + "RelationshipObjectType", + { + "$id": str, + "type": TypeRef, + "from": NodeRef, + "to": NodeRef, + }, +) + + +PropertyRef = TypedDict( + "PropertyRef", + { + "$ref": str, + }, +) # Constraint and Index Types -class Constraint(TypedDict): - """Database constraint definition.""" - - __dollar_id: str # Represents "$id" field - name: str - constraintType: Literal["uniqueness", "existence", "node_key"] - entityType: Literal["node", "relationship"] - nodeLabel: Optional[LabelRef] - relationshipType: Optional[TypeRef] - properties: List[PropertyRef] - - -class Index(TypedDict): - """Database index definition.""" - - __dollar_id: str # Represents "$id" field - name: str - indexType: str - entityType: Literal["node", "relationship"] - nodeLabel: Optional[LabelRef] - relationshipType: Optional[TypeRef] - properties: List[PropertyRef] +Constraint = TypedDict( + "Constraint", + { + "$id": str, + "name": str, + "constraintType": Literal["uniqueness", "existence", "node_key"], + "entityType": Literal["node", "relationship"], + "nodeLabel": Optional[LabelRef], + "relationshipType": Optional[TypeRef], + "properties": List[PropertyRef], + }, +) + + +Index = TypedDict( + "Index", + { + "$id": str, + "name": str, + "indexType": str, + "entityType": Literal["node", "relationship"], + "nodeLabel": Optional[LabelRef], + "relationshipType": Optional[TypeRef], + "properties": List[PropertyRef], + }, +) # Graph Schema Types -class GraphSchema(TypedDict): - """Complete graph schema definition.""" - - nodeLabels: List[NodeLabel] - relationshipTypes: List[RelationshipType] - nodeObjectTypes: List[NodeObjectType] - relationshipObjectTypes: List[RelationshipObjectType] - constraints: List[Constraint] - indexes: List[Index] - - -class GraphSchemaRepresentation(TypedDict): - """Graph schema representation with version.""" - - version: str - graphSchema: GraphSchema +GraphSchema = TypedDict( + "GraphSchema", + { + "nodeLabels": List[NodeLabel], + "relationshipTypes": List[RelationshipType], + "nodeObjectTypes": List[NodeObjectType], + "relationshipObjectTypes": List[RelationshipObjectType], + "constraints": List[Constraint], + "indexes": List[Index], + }, +) + + +GraphSchemaRepresentation = TypedDict( + "GraphSchemaRepresentation", + { + "version": str, + "graphSchema": GraphSchema, + }, +) # Graph Schema Extensions -class NodeKeyProperty(TypedDict): - """Node key property mapping.""" - - node: NodeRef - keyProperty: PropertyRef +NodeKeyProperty = TypedDict( + "NodeKeyProperty", + { + "node": NodeRef, + "keyProperty": PropertyRef, + }, +) -class GraphSchemaExtensionsRepresentation(TypedDict): - """Graph schema extensions.""" - - nodeKeyProperties: List[NodeKeyProperty] +GraphSchemaExtensionsRepresentation = TypedDict( + "GraphSchemaExtensionsRepresentation", + { + "nodeKeyProperties": List[NodeKeyProperty], + }, +) # Data Source Schema Types -class RecommendedType(TypedDict): - """Recommended data type for a field.""" - - type: Literal["string", "integer", "float", "boolean"] - - -class Field(TypedDict): - """Field definition in a table schema.""" - - name: str - sample: str - recommendedType: RecommendedType - - -class TableSchema(TypedDict): - """Table schema definition.""" - - name: str - fields: List[Field] - - -class DataSourceSchema(TypedDict): - """Data source schema definition.""" - - type: Literal["local", "remote"] - tableSchemas: List[TableSchema] +RecommendedType = TypedDict( + "RecommendedType", + { + "type": AuraDataImportSupportedTypes, + }, +) + + +Field = TypedDict( + "Field", + { + "name": str, + "sample": str, + "recommendedType": RecommendedType, + }, +) + + +TableSchema = TypedDict( + "TableSchema", + { + "name": str, + "fields": List[Field], + }, +) + + +DataSourceSchema = TypedDict( + "DataSourceSchema", + { + "type": Literal["local", "remote"], + "tableSchemas": List[TableSchema], + }, +) # Mapping Types -class PropertyMapping(TypedDict): - """Property to field mapping.""" - - property: PropertyRef - fieldName: str - - -class NodeMapping(TypedDict): - """Node mapping to table.""" - - node: NodeRef - tableName: str - propertyMappings: List[PropertyMapping] - - -class FieldMapping(TypedDict): - """Field mapping for relationships.""" - - fieldName: str - - -class RelationshipMapping(TypedDict): - """Relationship mapping to table.""" - - relationship: NodeRef - tableName: str - propertyMappings: List[PropertyMapping] - fromMapping: FieldMapping - toMapping: FieldMapping - - -class GraphMappingRepresentation(TypedDict): - """Graph mapping representation.""" - - dataSourceSchema: DataSourceSchema - nodeMappings: List[NodeMapping] - relationshipMappings: List[RelationshipMapping] +PropertyMapping = TypedDict( + "PropertyMapping", + { + "property": PropertyRef, + "fieldName": str, + }, +) + + +NodeMapping = TypedDict( + "NodeMapping", + { + "node": NodeRef, + "tableName": str, + "propertyMappings": List[PropertyMapping], + }, +) + + +FieldMapping = TypedDict( + "FieldMapping", + { + "fieldName": str, + }, +) + + +RelationshipMapping = TypedDict( + "RelationshipMapping", + { + "relationship": NodeRef, + "tableName": str, + "propertyMappings": List[PropertyMapping], + "fromMapping": FieldMapping, + "toMapping": FieldMapping, + }, +) + + +GraphMappingRepresentation = TypedDict( + "GraphMappingRepresentation", + { + "dataSourceSchema": DataSourceSchema, + "nodeMappings": List[NodeMapping], + "relationshipMappings": List[RelationshipMapping], + }, +) # Configuration Types -class Configurations(TypedDict): - """Configuration settings.""" - - idsToIgnore: List[str] +Configurations = TypedDict( + "Configurations", + { + "idsToIgnore": List[str], + }, +) # Main Data Model Types -class DataModelContent(TypedDict): - """Data model content structure.""" - - version: str - graphSchemaRepresentation: GraphSchemaRepresentation - graphSchemaExtensionsRepresentation: GraphSchemaExtensionsRepresentation - graphMappingRepresentation: GraphMappingRepresentation - configurations: Configurations - - -class AuraDataImportDataModel(TypedDict): - """Complete Aura Data Import model structure.""" - - version: str - visualisation: List[AuraDataImportVisualisationNode] - dataModel: DataModelContent +DataModelContent = TypedDict( + "DataModelContent", + { + "version": str, + "graphSchemaRepresentation": GraphSchemaRepresentation, + "graphSchemaExtensionsRepresentation": GraphSchemaExtensionsRepresentation, + "graphMappingRepresentation": GraphMappingRepresentation, + "configurations": Configurations, + }, +) + + +Visualisation = TypedDict( + "Visualisation", + { + "nodes": List[AuraDataImportVisualisationNode], + }, +) + + +AuraDataImportDataModel = TypedDict( + "AuraDataImportDataModel", + { + "version": str, + "visualisation": Visualisation, + "dataModel": DataModelContent, + }, +) diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py index 25ba303..76eab2b 100644 --- a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py @@ -4,6 +4,8 @@ from pydantic import BaseModel, Field, ValidationInfo, field_validator +from .aura_data_import import models as AuraDataImportModels + NODE_COLOR_PALETTE = [ ("#e3f2fd", "#1976d2"), # Light Blue / Blue ("#f3e5f5", "#7b1fa2"), # Light Purple / Purple @@ -60,7 +62,8 @@ class Property(BaseModel): description="The Neo4j type of the property. Should be all caps.", ) source: PropertySource | None = Field( - default=None, description="The source of the property, if known. For example this may be a CSV file or a database table." + default=None, + description="The source of the property, if known. For example this may be a CSV file or a database table.", ) description: str | None = Field( default=None, description="The description of the property" @@ -111,26 +114,31 @@ def to_arrows(self, is_key: bool = False) -> dict[str, Any]: @classmethod def from_aura_data_import( - cls, aura_data_import_property: dict[str, Any], source_mapping: dict[str, Any] + cls, + aura_data_import_property: AuraDataImportModels.Property, + source_mapping: dict[str, Any], ) -> "Property": """ Convert an Aura Data Import Property to a Property. - aura_data_import_property is a dict with the following structure: - { - "$id": "p:4", - "token": "currency", - "type": { - "type": "string" - }, - "nullable": true - } - source_mapping is a dict with the following structure: - { - "tableName": "countries.csv", - "fieldName": "currency", - "type": "local" - } + Parameters + ---------- + aura_data_import_property : AuraProperty + The Aura Data Import property with structure: + { + "$id": "p:4", + "token": "currency", + "type": "string", + "nullable": true + } + source_mapping : dict[str, Any] + Source mapping information with structure: + { + "tableName": "countries.csv", + "fieldName": "currency", + "type": "local", + "source_type": "local" + } """ # Map Neo4j Data Importer types to our internal types type_mapping = { @@ -166,7 +174,7 @@ def from_aura_data_import( def to_aura_data_import( self, property_id: str, is_key: bool = False - ) -> dict[str, Any]: + ) -> AuraDataImportModels.Property: """ Convert a Property to Aura Data Import format. """ @@ -297,9 +305,9 @@ def to_arrows( @classmethod def from_aura_data_import( cls, - aura_data_import_node_label: dict[str, Any], + aura_data_import_node_label: AuraDataImportModels.NodeLabel, key_property_token: str, - node_mapping: dict[str, Any], + node_mapping: AuraDataImportModels.NodeMapping, source_type: str, ) -> "Node": """ @@ -336,7 +344,9 @@ def from_aura_data_import( other_props = [] def _prepare_source_mapping( - node_mapping: dict[str, Any], property_id: str, source_type: str + node_mapping: AuraDataImportModels.NodeMapping, + property_id: str, + source_type: str, ) -> dict[str, Any]: """ Prepare the source mapping for the node mapping. @@ -400,7 +410,12 @@ def to_aura_data_import( constraint_id: str, index_id: str, property_id_mapping: dict[str, str] = None, - ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any], dict[str, Any]]: + ) -> tuple[ + AuraDataImportModels.NodeLabel, + AuraDataImportModels.NodeKeyProperty, + AuraDataImportModels.Constraint, + AuraDataImportModels.Index, + ]: """ Convert a Node to Aura Data Import NodeLabel format. Returns tuple of (NodeLabel, KeyProperty, Constraint, Index) @@ -605,10 +620,10 @@ def to_arrows(self) -> dict[str, Any]: @classmethod def from_aura_data_import( cls, - aura_data_import_relationship_type: dict[str, Any], - aura_data_import_relationship_object: dict[str, Any], + aura_data_import_relationship_type: AuraDataImportModels.RelationshipType, + aura_data_import_relationship_object: AuraDataImportModels.RelationshipObjectType, node_id_to_label_map: dict[str, str], - relationship_mapping: dict[str, Any], + relationship_mapping: AuraDataImportModels.RelationshipMapping, source_type: str, ) -> "Relationship": """Convert Aura Data Import RelationshipType and RelationshipObjectType to a Relationship.""" @@ -617,7 +632,9 @@ def from_aura_data_import( other_props = [] def _prepare_source_mapping( - relationship_mapping: dict[str, Any], property_id: str, source_type: str + relationship_mapping: AuraDataImportModels.RelationshipMapping, + property_id: str, + source_type: str, ) -> dict[str, Any]: """ Prepare the source mapping for the relationship mapping. @@ -628,7 +645,9 @@ def _prepare_source_mapping( if x["property"]["$ref"] == "#" + property_id ] if not field_name: - raise ValueError(f"Property {property_id} not found in relationship mapping") + raise ValueError( + f"Property {property_id} not found in relationship mapping" + ) return { "tableName": relationship_mapping["tableName"], "fieldName": field_name[0], @@ -643,11 +662,8 @@ def _prepare_source_mapping( prop, _prepare_source_mapping(relationship_mapping, prop["$id"], source_type), ) - # For simplicity, treat first property as key if any exist - if not key_prop and aura_data_import_relationship_type["properties"]: - key_prop = converted_prop - else: - other_props.append(converted_prop) + # Add all properties as regular properties (no automatic key property assignment) + other_props.append(converted_prop) # Get start and end node labels from the object type start_node_ref = aura_data_import_relationship_object["from"]["$ref"] @@ -670,7 +686,10 @@ def to_aura_data_import( constraint_id: str = None, index_id: str = None, ) -> tuple[ - dict[str, Any], dict[str, Any], dict[str, Any] | None, dict[str, Any] | None + AuraDataImportModels.RelationshipType, + AuraDataImportModels.RelationshipObjectType, + AuraDataImportModels.Constraint | None, + AuraDataImportModels.Index | None, ]: """Convert a Relationship to Aura Data Import format. @@ -947,7 +966,7 @@ def to_arrows_json_str(self) -> str: @classmethod def from_aura_data_import( - cls, aura_data_import_data_model: dict[str, Any] + cls, aura_data_import_data_model: AuraDataImportModels.AuraDataImportDataModel ) -> "DataModel": """Convert an Aura Data Import DataModel to a DataModel.""" graph_schema = aura_data_import_data_model["dataModel"][ @@ -959,7 +978,7 @@ def from_aura_data_import( node_mappings = aura_data_import_data_model["dataModel"][ "graphMappingRepresentation" ]["nodeMappings"] - + # Get the data source schema to determine source type data_source_schema = aura_data_import_data_model["dataModel"][ "graphMappingRepresentation" @@ -1103,7 +1122,7 @@ def from_aura_data_import( return cls(nodes=nodes, relationships=relationships, metadata=metadata) - def to_aura_data_import_dict(self) -> dict[str, Any]: + def to_aura_data_import_dict(self) -> AuraDataImportModels.AuraDataImportDataModel: """Convert the data model to an Aura Data Import dictionary.""" # Check if we have stored Aura Data Import metadata aura_metadata = self.metadata.get("aura_data_import", {}) @@ -1308,10 +1327,52 @@ def to_aura_data_import_dict(self) -> dict[str, Any]: else target_node.key_property.name.lower() ) + # Create property mappings for relationship properties + property_mappings = [] + + # Find the corresponding relationship type to get property IDs + rel_type_id = f"rt:{i + 1}" + rel_type = None + for rt in relationship_types: + if rt["$id"] == rel_type_id: + rel_type = rt + break + + if rel_type and rel_type["properties"]: + for prop_def in rel_type["properties"]: + prop_id = prop_def["$id"] + prop_token = prop_def["token"] + + # Find the corresponding property in our relationship to get the field name + field_name = prop_token # default to token name + + # Check key property first + if rel.key_property and rel.key_property.name == prop_token: + field_name = ( + rel.key_property.source.column_name + if rel.key_property.source + and rel.key_property.source.column_name + else prop_token + ) + else: + # Check other properties + for prop in rel.properties: + if prop.name == prop_token: + field_name = ( + prop.source.column_name + if prop.source and prop.source.column_name + else prop_token + ) + break + + property_mappings.append( + {"property": {"$ref": f"#{prop_id}"}, "fieldName": field_name} + ) + rel_mapping = { "relationship": {"$ref": f"#{rel_obj_id}"}, "tableName": table_name, - "propertyMappings": [], # Empty for now, can be extended if relationships have properties + "propertyMappings": property_mappings, "fromMapping": {"fieldName": from_field}, "toMapping": {"fieldName": to_field}, } @@ -1345,18 +1406,22 @@ def to_aura_data_import_dict(self) -> dict[str, Any]: for prop in node.properties: if prop.source and prop.source.source_type: source_types.add(prop.source.source_type) - + for rel in self.relationships: - if rel.key_property and rel.key_property.source and rel.key_property.source.source_type: + if ( + rel.key_property + and rel.key_property.source + and rel.key_property.source.source_type + ): source_types.add(rel.key_property.source.source_type) for prop in rel.properties: if prop.source and prop.source.source_type: source_types.add(prop.source.source_type) - + # Default to "local" if no source types found, or use the first one found # In practice, all properties should have the same source type for a given data model data_source_type = source_types.pop() if source_types else "local" - + table_schemas = [] for table_name in sorted(table_names): # Sort for consistent output # Generate field schemas based on node/relationship mappings @@ -1413,9 +1478,36 @@ def to_aura_data_import_dict(self) -> dict[str, Any]: } ) + # Add relationship property fields + for prop_mapping in rel_mapping["propertyMappings"]: + field_name = prop_mapping["fieldName"] + # Find the property to get its type + prop_ref = prop_mapping["property"]["$ref"].replace("#", "") + prop_type = "string" # default + + # Search for the property in relationship types + for rel_type in relationship_types: + for prop in rel_type["properties"]: + if prop["$id"] == prop_ref: + prop_type = prop["type"]["type"] + break + + # Add field if not already present + if not any(f["name"] == field_name for f in fields): + fields.append( + { + "name": field_name, + "sample": f"sample_{field_name}", + "recommendedType": {"type": prop_type}, + } + ) + table_schemas.append({"name": table_name, "fields": fields}) - stored_data_source_schema = {"type": data_source_type, "tableSchemas": table_schemas} + stored_data_source_schema = { + "type": data_source_type, + "tableSchemas": table_schemas, + } else: stored_data_source_schema = aura_metadata.get( "dataSourceSchema", {"type": "local", "tableSchemas": []} diff --git a/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py b/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py index 6c545da..69e6cc9 100644 --- a/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py +++ b/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py @@ -360,9 +360,12 @@ def test_from_aura_data_import_relationship_with_properties(self): ) assert relationship.type == "CONNECTED_TO" - assert relationship.key_property.name == "weight" # First property becomes key - assert len(relationship.properties) == 1 # Second property - assert relationship.properties[0].name == "since" + assert relationship.key_property is None # No automatic key property assignment + assert ( + len(relationship.properties) == 2 + ) # Both properties are regular properties + assert relationship.properties[0].name == "weight" + assert relationship.properties[1].name == "since" def test_to_aura_data_import_simple_relationship(self): """Test converting a simple relationship to Aura Data Import format.""" @@ -706,9 +709,11 @@ def test_export_without_metadata_uses_defaults(self): assert aura_dict["version"] == "2.3.1-beta.0" assert aura_dict["dataModel"]["version"] == "2.3.1-beta.0" assert aura_dict["dataModel"]["configurations"] == {"idsToIgnore": []} - + # Verify that table schemas are automatically generated (not empty) - data_source_schema = aura_dict["dataModel"]["graphMappingRepresentation"]["dataSourceSchema"] + data_source_schema = aura_dict["dataModel"]["graphMappingRepresentation"][ + "dataSourceSchema" + ] assert data_source_schema["type"] == "local" assert len(data_source_schema["tableSchemas"]) == 1 assert data_source_schema["tableSchemas"][0]["name"] == "testnode.csv" @@ -851,9 +856,9 @@ def test_aura_data_import_round_trip_data_integrity( # Check that all nodes are preserved original_node_labels = { nl["token"] - for nl in sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"][ - "graphSchema" - ]["nodeLabels"] + for nl in sample_aura_data_import_model["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"]["nodeLabels"] } converted_node_labels = { nl["token"] @@ -866,9 +871,9 @@ def test_aura_data_import_round_trip_data_integrity( # Check that all relationships are preserved original_rel_types = { rt["token"] - for rt in sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"][ - "graphSchema" - ]["relationshipTypes"] + for rt in sample_aura_data_import_model["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"]["relationshipTypes"] } converted_rel_types = { rt["token"] @@ -887,4 +892,4 @@ def test_aura_data_import_round_trip_data_integrity( assert ( converted_back["dataModel"]["configurations"] == sample_aura_data_import_model["dataModel"]["configurations"] - ) \ No newline at end of file + ) From ba7c24bdf1fb62e43b794ff74283107cfbb5761a Mon Sep 17 00:00:00 2001 From: alex Date: Tue, 1 Jul 2025 09:36:57 -0500 Subject: [PATCH 5/6] add aura data import tools, update export tool names --- servers/mcp-neo4j-data-modeling/CHANGELOG.md | 3 ++- .../src/mcp_neo4j_data_modeling/server.py | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/servers/mcp-neo4j-data-modeling/CHANGELOG.md b/servers/mcp-neo4j-data-modeling/CHANGELOG.md index a18af2e..0362bdb 100644 --- a/servers/mcp-neo4j-data-modeling/CHANGELOG.md +++ b/servers/mcp-neo4j-data-modeling/CHANGELOG.md @@ -3,9 +3,10 @@ ### Fixed ### Changed +* Update tool name `export_to_arrows_json` to `export_to_arrows_json_str` to be more specific ### Added -* Add import and export from Aura Data Import tool format +* Add tools to import and export from Aura Data Import tool format ## v0.1.1 diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py index c836892..47b356d 100644 --- a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py @@ -110,11 +110,25 @@ def load_from_arrows_json(arrows_data_model_dict: dict[str, Any]) -> DataModel: return DataModel.from_arrows(arrows_data_model_dict) @mcp.tool() - def export_to_arrows_json(data_model: DataModel) -> str: + def export_to_arrows_json_str(data_model: DataModel) -> str: "Export the data model to the Arrows web application format. Returns a JSON string. This should be presented to the user as an artifact if possible." logger.info("Exporting the data model to the Arrows web application format.") return data_model.to_arrows_json_str() + @mcp.tool() + def load_from_aura_data_import_json( + aura_data_import_dict: dict[str, Any], + ) -> DataModel: + "Load a data model from the Aura Data Import format. Returns a data model as a JSON string." + logger.info("Loading a data model from the Aura Data Import format.") + return DataModel.from_aura_data_import(aura_data_import_dict) + + @mcp.tool() + def export_to_aura_data_import_json_str(data_model: DataModel) -> str: + "Export the data model to the Aura Data Import format. Returns a JSON string. This should be presented to the user as an artifact if possible." + logger.info("Exporting the data model to the Aura Data Import format.") + return data_model.to_aura_data_import_json_str() + @mcp.tool() def get_mermaid_config_str(data_model: DataModel) -> str: "Get the Mermaid configuration string for the data model. This may be visualized in Claude Desktop and other applications with Mermaid support." From a32518bfff3c205fa4319832df3586eaaacdf516 Mon Sep 17 00:00:00 2001 From: alex Date: Tue, 1 Jul 2025 12:38:26 -0500 Subject: [PATCH 6/6] update docstrings, add validation to all export tools --- .../src/mcp_neo4j_data_modeling/data_model.py | 49 +++++++---- .../src/mcp_neo4j_data_modeling/server.py | 28 +++---- .../unit/test_aura_data_import_conversion.py | 83 +++++++++++++++++++ 3 files changed, 132 insertions(+), 28 deletions(-) diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py index 76eab2b..5ca214e 100644 --- a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py @@ -63,10 +63,10 @@ class Property(BaseModel): ) source: PropertySource | None = Field( default=None, - description="The source of the property, if known. For example this may be a CSV file or a database table.", + description="The source of the property, if known. For example this may be a CSV file or a database table. This should always be provided if possible, especially when exporting data models to the Aura Data Import format.", ) description: str | None = Field( - default=None, description="The description of the property" + default=None, description="The description of the property." ) metadata: dict[str, Any] = Field( default_factory=dict, @@ -207,9 +207,12 @@ class Node(BaseModel): label: str = Field( description="The label of the node. Should be in PascalCase.", min_length=1 ) - key_property: Property = Field(description="The key property of the node") + key_property: Property = Field( + description="The key property of the node. This must exist!" + ) properties: list[Property] = Field( - default_factory=list, description="The properties of the node" + default_factory=list, + description="The other properties of the node. The key property is not included here.", ) metadata: dict[str, Any] = Field( default_factory=dict, @@ -504,10 +507,11 @@ class Relationship(BaseModel): start_node_label: str = Field(description="The label of the start node") end_node_label: str = Field(description="The label of the end node") key_property: Property | None = Field( - default=None, description="The key property of the relationship, if any." + default=None, description="The key property of the relationship, if it exists." ) properties: list[Property] = Field( - default_factory=list, description="The properties of the relationship, if any." + default_factory=list, + description="The other properties of the relationship, if any.", ) metadata: dict[str, Any] = Field( default_factory=dict, @@ -1302,14 +1306,31 @@ def to_aura_data_import_dict(self) -> AuraDataImportModels.AuraDataImportDataMod if node.label == rel.end_node_label: target_node = node - # Use the same table as the source node, or default - table_name = ( - source_node.key_property.source.table_name - if source_node - and source_node.key_property.source - and source_node.key_property.source.table_name - else f"{source_node.label.lower()}_{rel.type.lower()}_{target_node.label.lower()}.csv" - ) + # Determine table name from relationship properties first, then fall back to source node + table_name = None + + # Check if any relationship property has source information with table name + if ( + rel.key_property + and rel.key_property.source + and rel.key_property.source.table_name + ): + table_name = rel.key_property.source.table_name + else: + for prop in rel.properties: + if prop.source and prop.source.table_name: + table_name = prop.source.table_name + break + + # If no relationship property has table info, use source node's table or default + if not table_name: + table_name = ( + source_node.key_property.source.table_name + if source_node + and source_node.key_property.source + and source_node.key_property.source.table_name + else f"{source_node.label.lower()}_{rel.type.lower()}_{target_node.label.lower()}.csv" + ) # Generate field mappings based on node key properties from_field = ( diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py index 47b356d..7e7da2d 100644 --- a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py @@ -51,9 +51,7 @@ def neo4j_data_ingest_process() -> str: return DATA_INGEST_PROCESS @mcp.tool() - def validate_node( - node: Node, return_validated: bool = False - ) -> bool | dict[str, Any]: + def validate_node(node: Node, return_validated: bool = False) -> bool | Node: "Validate a single node. Returns True if the node is valid, otherwise raises a ValueError. If return_validated is True, returns the validated node." logger.info("Validating a single node.") try: @@ -70,7 +68,7 @@ def validate_node( @mcp.tool() def validate_relationship( relationship: Relationship, return_validated: bool = False - ) -> bool | dict[str, Any]: + ) -> bool | Relationship: "Validate a single relationship. Returns True if the relationship is valid, otherwise raises a ValueError. If return_validated is True, returns the validated relationship." logger.info("Validating a single relationship.") try: @@ -89,7 +87,7 @@ def validate_relationship( @mcp.tool() def validate_data_model( data_model: DataModel, return_validated: bool = False - ) -> bool | dict[str, Any]: + ) -> bool | DataModel: "Validate the entire data model. Returns True if the data model is valid, otherwise raises a ValueError. If return_validated is True, returns the validated data model." logger.info("Validating the entire data model.") try: @@ -112,8 +110,9 @@ def load_from_arrows_json(arrows_data_model_dict: dict[str, Any]) -> DataModel: @mcp.tool() def export_to_arrows_json_str(data_model: DataModel) -> str: "Export the data model to the Arrows web application format. Returns a JSON string. This should be presented to the user as an artifact if possible." + validated_data_model: DataModel = validate_data_model(data_model, True) logger.info("Exporting the data model to the Arrows web application format.") - return data_model.to_arrows_json_str() + return validated_data_model.to_arrows_json_str() @mcp.tool() def load_from_aura_data_import_json( @@ -125,20 +124,21 @@ def load_from_aura_data_import_json( @mcp.tool() def export_to_aura_data_import_json_str(data_model: DataModel) -> str: - "Export the data model to the Aura Data Import format. Returns a JSON string. This should be presented to the user as an artifact if possible." + """ + Export the data model to the Aura Data Import format. + If the data source information is known, it should be provided in the appropriate fields of the data model. + Returns a JSON string. This should be presented to the user as an artifact if possible. + """ + validated_data_model: DataModel = validate_data_model(data_model, True) logger.info("Exporting the data model to the Aura Data Import format.") - return data_model.to_aura_data_import_json_str() + return validated_data_model.to_aura_data_import_json_str() @mcp.tool() def get_mermaid_config_str(data_model: DataModel) -> str: "Get the Mermaid configuration string for the data model. This may be visualized in Claude Desktop and other applications with Mermaid support." + validated_data_model: DataModel = validate_data_model(data_model, True) logger.info("Getting the Mermaid configuration string for the data model.") - try: - dm_validated = DataModel.model_validate(data_model, strict=True) - except ValidationError as e: - logger.error(f"Validation error: {e}") - raise ValueError(f"Validation error: {e}") - return dm_validated.get_mermaid_config_str() + return validated_data_model.get_mermaid_config_str() @mcp.tool() def get_node_cypher_ingest_query( diff --git a/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py b/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py index 69e6cc9..fc9cac2 100644 --- a/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py +++ b/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py @@ -10,6 +10,7 @@ DataModel, Node, Property, + PropertySource, Relationship, ) @@ -432,6 +433,88 @@ def test_to_aura_data_import_relationship_with_properties(self): assert index["entityType"] == "relationship" assert index["relationshipType"]["$ref"] == "#rt:2" + def test_relationship_source_info_export(self): + """Test that relationship property source information is properly exported.""" + # Create nodes with source information + country_source = PropertySource( + column_name="country_id", + table_name="countries.csv", + location="local", + source_type="local", + ) + + country_key_prop = Property( + name="id", + type="INTEGER", + source=country_source, + description="Country identifier", + ) + + country_node = Node( + label="Country", key_property=country_key_prop, properties=[] + ) + + region_source = PropertySource( + column_name="region_name", + table_name="regions.csv", + location="local", + source_type="local", + ) + + region_key_prop = Property( + name="name", type="STRING", source=region_source, description="Region name" + ) + + region_node = Node(label="Region", key_property=region_key_prop, properties=[]) + + # Create relationship with property that has different source table + rel_prop_source = PropertySource( + column_name="connection_weight", + table_name="country_region_connections.csv", + location="local", + source_type="local", + ) + + rel_prop = Property( + name="weight", + type="FLOAT", + source=rel_prop_source, + description="Connection weight", + ) + + relationship = Relationship( + type="BELONGS_TO", + start_node_label="Country", + end_node_label="Region", + properties=[rel_prop], + ) + + # Create data model and export + data_model = DataModel( + nodes=[country_node, region_node], relationships=[relationship] + ) + aura_dict = data_model.to_aura_data_import_dict() + + # Verify that relationship uses its own table name, not the source node's table + rel_mappings = aura_dict["dataModel"]["graphMappingRepresentation"][ + "relationshipMappings" + ] + assert len(rel_mappings) == 1 + assert rel_mappings[0]["tableName"] == "country_region_connections.csv" + + # Verify that relationship property field name is correct + rel_prop_mappings = rel_mappings[0]["propertyMappings"] + assert len(rel_prop_mappings) == 1 + assert rel_prop_mappings[0]["fieldName"] == "connection_weight" + + # Verify that node mappings still use their own table names + node_mappings = aura_dict["dataModel"]["graphMappingRepresentation"][ + "nodeMappings" + ] + assert len(node_mappings) == 2 + assert node_mappings[0]["tableName"] == "countries.csv" + assert node_mappings[1]["tableName"] == "regions.csv" + class TestDataModelConversion: """Test DataModel conversion methods."""