diff --git a/servers/mcp-neo4j-data-modeling/CHANGELOG.md b/servers/mcp-neo4j-data-modeling/CHANGELOG.md index b677d5d..0362bdb 100644 --- a/servers/mcp-neo4j-data-modeling/CHANGELOG.md +++ b/servers/mcp-neo4j-data-modeling/CHANGELOG.md @@ -3,8 +3,10 @@ ### Fixed ### Changed +* Update tool name `export_to_arrows_json` to `export_to_arrows_json_str` to be more specific ### Added +* Add tools to import and export from Aura Data Import tool format ## v0.1.1 diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/aura_data_import/models.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/aura_data_import/models.py new file mode 100644 index 0000000..fdc298e --- /dev/null +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/aura_data_import/models.py @@ -0,0 +1,316 @@ +""" +These are the models that make up the data model structure generated by the Aura Data Import console tool. +""" + +from typing import List, Literal, Optional, TypedDict + +AuraDataImportSupportedTypes = Literal[ + "string", "integer", "float", "boolean" +] # supported types in Aura Data Import + +Position = TypedDict( + "Position", + { + "x": float, + "y": float, + }, +) + + +AuraDataImportVisualisationNode = TypedDict( + "AuraDataImportVisualisationNode", + { + "id": str, + "position": Position, + }, +) + + +# Property and Type Definitions +PropertyType = TypedDict( + "PropertyType", + { + "type": AuraDataImportSupportedTypes, + }, +) + + +Property = TypedDict( + "Property", + { + "$id": str, + "token": str, + "type": PropertyType, + "nullable": bool, + }, +) + + +# Node and Relationship Schema Types +NodeLabel = TypedDict( + "NodeLabel", + { + "$id": str, + "token": str, + "properties": List[Property], + }, +) + + +RelationshipType = TypedDict( + "RelationshipType", + { + "$id": str, + "token": str, + "properties": List[Property], + }, +) + + +LabelRef = TypedDict( + "LabelRef", + { + "$ref": str, + }, +) + + +NodeObjectType = TypedDict( + "NodeObjectType", + { + "$id": str, + "labels": List[LabelRef], + }, +) + + +TypeRef = TypedDict( + "TypeRef", + { + "$ref": str, + }, +) + + +NodeRef = TypedDict( + "NodeRef", + { + "$ref": str, + }, +) + + +RelationshipObjectType = TypedDict( + "RelationshipObjectType", + { + "$id": str, + "type": TypeRef, + "from": NodeRef, + "to": NodeRef, + }, +) + + +PropertyRef = TypedDict( + "PropertyRef", + { + "$ref": str, + }, +) + + +# Constraint and Index Types +Constraint = TypedDict( + "Constraint", + { + "$id": str, + "name": str, + "constraintType": Literal["uniqueness", "existence", "node_key"], + "entityType": Literal["node", "relationship"], + "nodeLabel": Optional[LabelRef], + "relationshipType": Optional[TypeRef], + "properties": List[PropertyRef], + }, +) + + +Index = TypedDict( + "Index", + { + "$id": str, + "name": str, + "indexType": str, + "entityType": Literal["node", "relationship"], + "nodeLabel": Optional[LabelRef], + "relationshipType": Optional[TypeRef], + "properties": List[PropertyRef], + }, +) + + +# Graph Schema Types +GraphSchema = TypedDict( + "GraphSchema", + { + "nodeLabels": List[NodeLabel], + "relationshipTypes": List[RelationshipType], + "nodeObjectTypes": List[NodeObjectType], + "relationshipObjectTypes": List[RelationshipObjectType], + "constraints": List[Constraint], + "indexes": List[Index], + }, +) + + +GraphSchemaRepresentation = TypedDict( + "GraphSchemaRepresentation", + { + "version": str, + "graphSchema": GraphSchema, + }, +) + + +# Graph Schema Extensions +NodeKeyProperty = TypedDict( + "NodeKeyProperty", + { + "node": NodeRef, + "keyProperty": PropertyRef, + }, +) + + +GraphSchemaExtensionsRepresentation = TypedDict( + "GraphSchemaExtensionsRepresentation", + { + "nodeKeyProperties": List[NodeKeyProperty], + }, +) + + +# Data Source Schema Types +RecommendedType = TypedDict( + "RecommendedType", + { + "type": AuraDataImportSupportedTypes, + }, +) + + +Field = TypedDict( + "Field", + { + "name": str, + "sample": str, + "recommendedType": RecommendedType, + }, +) + + +TableSchema = TypedDict( + "TableSchema", + { + "name": str, + "fields": List[Field], + }, +) + + +DataSourceSchema = TypedDict( + "DataSourceSchema", + { + "type": Literal["local", "remote"], + "tableSchemas": List[TableSchema], + }, +) + + +# Mapping Types +PropertyMapping = TypedDict( + "PropertyMapping", + { + "property": PropertyRef, + "fieldName": str, + }, +) + + +NodeMapping = TypedDict( + "NodeMapping", + { + "node": NodeRef, + "tableName": str, + "propertyMappings": List[PropertyMapping], + }, +) + + +FieldMapping = TypedDict( + "FieldMapping", + { + "fieldName": str, + }, +) + + +RelationshipMapping = TypedDict( + "RelationshipMapping", + { + "relationship": NodeRef, + "tableName": str, + "propertyMappings": List[PropertyMapping], + "fromMapping": FieldMapping, + "toMapping": FieldMapping, + }, +) + + +GraphMappingRepresentation = TypedDict( + "GraphMappingRepresentation", + { + "dataSourceSchema": DataSourceSchema, + "nodeMappings": List[NodeMapping], + "relationshipMappings": List[RelationshipMapping], + }, +) + + +# Configuration Types +Configurations = TypedDict( + "Configurations", + { + "idsToIgnore": List[str], + }, +) + + +# Main Data Model Types +DataModelContent = TypedDict( + "DataModelContent", + { + "version": str, + "graphSchemaRepresentation": GraphSchemaRepresentation, + "graphSchemaExtensionsRepresentation": GraphSchemaExtensionsRepresentation, + "graphMappingRepresentation": GraphMappingRepresentation, + "configurations": Configurations, + }, +) + + +Visualisation = TypedDict( + "Visualisation", + { + "nodes": List[AuraDataImportVisualisationNode], + }, +) + + +AuraDataImportDataModel = TypedDict( + "AuraDataImportDataModel", + { + "version": str, + "visualisation": Visualisation, + "dataModel": DataModelContent, + }, +) diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py index b0bc03d..5ca214e 100644 --- a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py @@ -1,9 +1,11 @@ import json from collections import Counter -from typing import Any +from typing import Any, Literal from pydantic import BaseModel, Field, ValidationInfo, field_validator +from .aura_data_import import models as AuraDataImportModels + NODE_COLOR_PALETTE = [ ("#e3f2fd", "#1976d2"), # Light Blue / Blue ("#f3e5f5", "#7b1fa2"), # Light Purple / Purple @@ -45,6 +47,10 @@ class PropertySource(BaseModel): default=None, description="The location of the property, if known. May be a file path, URL, etc.", ) + source_type: Literal["local", "remote"] | None = Field( + default=None, + description="The type of the data source: 'local' or 'remote'. 'local' means the data source is a file or database table on the local machine. 'remote' means the data source is a file or database table on a remote machine.", + ) class Property(BaseModel): @@ -56,10 +62,15 @@ class Property(BaseModel): description="The Neo4j type of the property. Should be all caps.", ) source: PropertySource | None = Field( - default=None, description="The source of the property, if known." + default=None, + description="The source of the property, if known. For example this may be a CSV file or a database table. This should always be provided if possible, especially when exporting data models to the Aura Data Import format.", ) description: str | None = Field( - default=None, description="The description of the property" + default=None, description="The description of the property." + ) + metadata: dict[str, Any] = Field( + default_factory=dict, + description="The metadata of the property. This should only be used when converting data models.", ) @field_validator("type") @@ -101,6 +112,94 @@ def to_arrows(self, is_key: bool = False) -> dict[str, Any]: self.name: value, } + @classmethod + def from_aura_data_import( + cls, + aura_data_import_property: AuraDataImportModels.Property, + source_mapping: dict[str, Any], + ) -> "Property": + """ + Convert an Aura Data Import Property to a Property. + + Parameters + ---------- + aura_data_import_property : AuraProperty + The Aura Data Import property with structure: + { + "$id": "p:4", + "token": "currency", + "type": "string", + "nullable": true + } + source_mapping : dict[str, Any] + Source mapping information with structure: + { + "tableName": "countries.csv", + "fieldName": "currency", + "type": "local", + "source_type": "local" + } + """ + # Map Neo4j Data Importer types to our internal types + type_mapping = { + "string": "STRING", + "integer": "INTEGER", + "float": "FLOAT", + "boolean": "BOOLEAN", + } + + prop_type = aura_data_import_property["type"]["type"] + mapped_type = type_mapping.get(prop_type, prop_type.upper()) + + source = PropertySource( + column_name=source_mapping.get("fieldName", None), + table_name=source_mapping.get("tableName", None), + location=source_mapping.get("type", None), + source_type=source_mapping.get("source_type", "local"), + ) + + # Create property with nullable and original ID stored in metadata + return cls( + name=aura_data_import_property["token"], + type=mapped_type, + description=None, # Aura Data Import doesn't have descriptions + source=source, + metadata={ + "aura_data_import": { + "nullable": aura_data_import_property.get("nullable", False), + "original_id": aura_data_import_property.get("$id"), + } + }, + ) + + def to_aura_data_import( + self, property_id: str, is_key: bool = False + ) -> AuraDataImportModels.Property: + """ + Convert a Property to Aura Data Import format. + """ + # Map our internal types to Neo4j Data Importer types + type_mapping = { + "STRING": "string", + "INTEGER": "integer", + "FLOAT": "float", + "BOOLEAN": "boolean", + } + + mapped_type = type_mapping.get( + self.type, "string" + ) # Default to string if type is not found + + # Use stored nullable value from metadata, or default based on key property + nullable = self.metadata.get("aura_data_import", {}).get("nullable", not is_key) + + return { + "$id": property_id, + "token": self.name, + "type": {"type": mapped_type}, + "nullable": nullable, + } + class Node(BaseModel): "A Neo4j Node." @@ -108,9 +207,12 @@ class Node(BaseModel): label: str = Field( description="The label of the node. Should be in PascalCase.", min_length=1 ) - key_property: Property = Field(description="The key property of the node") + key_property: Property = Field( + description="The key property of the node. This must exist!" + ) properties: list[Property] = Field( - default_factory=list, description="The properties of the node" + default_factory=list, + description="The other properties of the node. The key property is not included here.", ) metadata: dict[str, Any] = Field( default_factory=dict, @@ -203,6 +305,178 @@ def to_arrows( "caption": self.metadata.get("caption", ""), } + @classmethod + def from_aura_data_import( + cls, + aura_data_import_node_label: AuraDataImportModels.NodeLabel, + key_property_token: str, + node_mapping: AuraDataImportModels.NodeMapping, + source_type: str, + ) -> "Node": + """ + Convert an Aura Data Import NodeLabel to a Node. + + Parameters + ---------- + aura_data_import_node_label: dict[str, Any] + The Aura Data Import NodeLabel to convert. + key_property_token: str + The token of the key property to use. This is the property name. + node_mapping: dict[str, Any] + The node mapping from the graphMappingRepresentation. Should have the following structure: + ```json + { + "node": {"$ref": "#n:0"}, + "tableName": "countries.csv", + "propertyMappings": [ + { + "property": {"$ref": "#p:0_0"}, + "fieldName": "id" + } + ... + ] + } + ``` + Returns + ------- + Node + The converted Node. + """ + # Find the key property + key_prop = None + other_props = [] + + def _prepare_source_mapping( + node_mapping: AuraDataImportModels.NodeMapping, + property_id: str, + source_type: str, + ) -> dict[str, Any]: + """ + Prepare the source mapping for the node mapping. + """ + field_name = [ + x["fieldName"] + for x in node_mapping["propertyMappings"] + if x["property"]["$ref"] == "#" + property_id + ] + if not field_name: + raise ValueError(f"Property {property_id} not found in node mapping") + return { + "tableName": node_mapping["tableName"], + "fieldName": field_name[0], + "type": "local", # This was the original location field + "source_type": source_type, # The actual data source type + } + + for prop in aura_data_import_node_label["properties"]: + if prop["token"] == key_property_token: + key_prop = Property.from_aura_data_import( + prop, + _prepare_source_mapping(node_mapping, prop["$id"], source_type), + ) + else: + other_props.append( + Property.from_aura_data_import( + prop, + _prepare_source_mapping(node_mapping, prop["$id"], source_type), + ) + ) + + if not key_prop: + # If no key property found, use the first property as key + key_prop = Property.from_aura_data_import( + aura_data_import_node_label["properties"][0], + _prepare_source_mapping( + node_mapping, + aura_data_import_node_label["properties"][0]["$id"], + source_type, + ), + ) + other_props = [ + Property.from_aura_data_import( + p, _prepare_source_mapping(node_mapping, p["$id"], source_type) + ) + for p in aura_data_import_node_label["properties"][1:] + ] + + return cls( + label=aura_data_import_node_label["token"], + key_property=key_prop, + properties=other_props, + ) + + def to_aura_data_import( + self, + node_label_id: str, + node_obj_id: str, + key_prop_id: str, + constraint_id: str, + index_id: str, + property_id_mapping: dict[str, str] = None, + ) -> tuple[ + AuraDataImportModels.NodeLabel, + AuraDataImportModels.NodeKeyProperty, + AuraDataImportModels.Constraint, + AuraDataImportModels.Index, + ]: + """ + Convert a Node to Aura Data Import NodeLabel format. + Returns tuple of (NodeLabel, KeyProperty, Constraint, Index) + """ + # Create property list with key property first + all_props = [self.key_property] + self.properties + aura_props = [] + + # For the first property (key property), use the provided key_prop_id + # For additional properties, use the property_id_mapping if provided + for i, prop in enumerate(all_props): + if i == 0: + prop_id = key_prop_id + else: + # Use property mapping if available, otherwise generate based on node pattern + if property_id_mapping and prop.name in property_id_mapping: + prop_id = property_id_mapping[prop.name] + else: + prop_id = f"p:{node_label_id.split(':')[1]}_{i}" + + is_key = i == 0 # First property is the key property + aura_props.append(prop.to_aura_data_import(prop_id, is_key=is_key)) + + node_label = { + "$id": node_label_id, + "token": self.label, + "properties": aura_props, + } + + key_property = { + "node": {"$ref": f"#{node_obj_id}"}, + "keyProperty": {"$ref": f"#{key_prop_id}"}, + } + + # Create uniqueness constraint on key property + constraint = { + "$id": constraint_id, + "name": f"{self.label}_constraint", + "constraintType": "uniqueness", + "entityType": "node", + "nodeLabel": {"$ref": f"#{node_label_id}"}, + "relationshipType": None, + "properties": [{"$ref": f"#{key_prop_id}"}], + } + + # Create default index on key property + index = { + "$id": index_id, + "name": f"{self.label}_index", + "indexType": "default", + "entityType": "node", + "nodeLabel": {"$ref": f"#{node_label_id}"}, + "relationshipType": None, + "properties": [{"$ref": f"#{key_prop_id}"}], + } + + return (node_label, key_property, constraint, index) + def get_cypher_ingest_query_for_many_records(self) -> str: """ Generate a Cypher query to ingest a list of Node records into a Neo4j database. @@ -233,10 +507,11 @@ class Relationship(BaseModel): start_node_label: str = Field(description="The label of the start node") end_node_label: str = Field(description="The label of the end node") key_property: Property | None = Field( - default=None, description="The key property of the relationship, if any." + default=None, description="The key property of the relationship, if it exists." ) properties: list[Property] = Field( - default_factory=list, description="The properties of the relationship, if any." + default_factory=list, + description="The other properties of the relationship, if any.", ) metadata: dict[str, Any] = Field( default_factory=dict, @@ -346,6 +621,141 @@ def to_arrows(self) -> dict[str, Any]: "style": self.metadata.get("style", {}), } + @classmethod + def from_aura_data_import( + cls, + aura_data_import_relationship_type: AuraDataImportModels.RelationshipType, + aura_data_import_relationship_object: AuraDataImportModels.RelationshipObjectType, + node_id_to_label_map: dict[str, str], + relationship_mapping: AuraDataImportModels.RelationshipMapping, + source_type: str, + ) -> "Relationship": + """Convert Aura Data Import RelationshipType and RelationshipObjectType to a Relationship.""" + # Convert properties + key_prop = None + other_props = [] + + def _prepare_source_mapping( + relationship_mapping: AuraDataImportModels.RelationshipMapping, + property_id: str, + source_type: str, + ) -> dict[str, Any]: + """ + Prepare the source mapping for the relationship mapping. + """ + field_name = [ + x["fieldName"] + for x in relationship_mapping["propertyMappings"] + if x["property"]["$ref"] == "#" + property_id + ] + if not field_name: + raise ValueError( + f"Property {property_id} not found in relationship mapping" + ) + return { + "tableName": relationship_mapping["tableName"], + "fieldName": field_name[0], + "type": "local", # This was the original location field + "source_type": source_type, # The actual data source type + } + + for prop in aura_data_import_relationship_type["properties"]: + # Create a default source mapping for relationship properties + + converted_prop = Property.from_aura_data_import( + prop, + _prepare_source_mapping(relationship_mapping, prop["$id"], source_type), + ) + # Add all properties as regular properties (no automatic key property assignment) + other_props.append(converted_prop) + + # Get start and end node labels from the object type + start_node_ref = aura_data_import_relationship_object["from"]["$ref"] + end_node_ref = aura_data_import_relationship_object["to"]["$ref"] + + return cls( + type=aura_data_import_relationship_type["token"], + start_node_label=node_id_to_label_map[start_node_ref], + end_node_label=node_id_to_label_map[end_node_ref], + key_property=key_prop, + properties=other_props, + ) + + def to_aura_data_import( + self, + rel_type_id: str, + rel_obj_id: str, + start_node_id: str, + end_node_id: str, + constraint_id: str = None, + index_id: str = None, + ) -> tuple[ + AuraDataImportModels.RelationshipType, + AuraDataImportModels.RelationshipObjectType, + AuraDataImportModels.Constraint | None, + AuraDataImportModels.Index | None, + ]: + """Convert a Relationship to Aura Data Import format. + + Returns tuple of (RelationshipType, RelationshipObjectType, Constraint, Index) + Constraint and Index are None if the relationship has no key property. + """ + # Create relationship type + all_props = [] + if self.key_property: + all_props.append(self.key_property) + all_props.extend(self.properties) + + aura_props = [] + for i, prop in enumerate(all_props): + prop_id = f"p:{rel_type_id.split(':')[1]}_{i}" + is_key = ( + i == 0 and self.key_property is not None + ) # First property is the key property if it exists + aura_props.append(prop.to_aura_data_import(prop_id, is_key=is_key)) + + relationship_type = { + "$id": rel_type_id, + "token": self.type, + "properties": aura_props, + } + + # Create relationship object type + relationship_object = { + "$id": rel_obj_id, + "type": {"$ref": f"#{rel_type_id}"}, + "from": {"$ref": f"#{start_node_id}"}, + "to": {"$ref": f"#{end_node_id}"}, + } + + # Create constraint and index if relationship has key property + constraint = None + index = None + if self.key_property and constraint_id and index_id: + key_prop_id = aura_props[0]["$id"] # First property is the key property + + constraint = { + "$id": constraint_id, + "name": f"{self.type}_constraint", + "constraintType": "uniqueness", + "entityType": "relationship", + "nodeLabel": None, + "relationshipType": {"$ref": f"#{rel_type_id}"}, + "properties": [{"$ref": f"#{key_prop_id}"}], + } + + index = { + "$id": index_id, + "name": f"{self.type}_index", + "indexType": "default", + "entityType": "relationship", + "nodeLabel": None, + "relationshipType": {"$ref": f"#{rel_type_id}"}, + "properties": [{"$ref": f"#{key_prop_id}"}], + } + + return relationship_type, relationship_object, constraint, index + def get_cypher_ingest_query_for_many_records( self, start_node_key_property_name: str, end_node_key_property_name: str ) -> str: @@ -558,6 +968,632 @@ def to_arrows_json_str(self) -> str: "Convert the data model to an Arrows Data Model JSON string." return json.dumps(self.to_arrows_dict(), indent=2) + @classmethod + def from_aura_data_import( + cls, aura_data_import_data_model: AuraDataImportModels.AuraDataImportDataModel + ) -> "DataModel": + """Convert an Aura Data Import DataModel to a DataModel.""" + graph_schema = aura_data_import_data_model["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"] + key_properties = aura_data_import_data_model["dataModel"][ + "graphSchemaExtensionsRepresentation" + ]["nodeKeyProperties"] + node_mappings = aura_data_import_data_model["dataModel"][ + "graphMappingRepresentation" + ]["nodeMappings"] + + # Get the data source schema to determine source type + data_source_schema = aura_data_import_data_model["dataModel"][ + "graphMappingRepresentation" + ]["dataSourceSchema"] + source_type = data_source_schema.get("type", "local") + + # Create mapping from node object ID to key property token + node_key_map = {} + for key_prop in key_properties: + node_ref = key_prop["node"]["$ref"] + prop_ref = key_prop["keyProperty"]["$ref"] + # Find the property token by ID + for node_label in graph_schema["nodeLabels"]: + for prop in node_label["properties"]: + if prop["$id"] == prop_ref.replace("#", ""): + node_key_map[node_ref] = prop["token"] + break + + # Create node ID to label mapping + node_id_to_label_map = {} + for node_obj in graph_schema["nodeObjectTypes"]: + node_id = node_obj["$id"] + # Find the label from nodeLabels + for label_ref in node_obj["labels"]: + label_id = label_ref["$ref"].replace("#", "") + for node_label in graph_schema["nodeLabels"]: + if node_label["$id"] == label_id: + node_id_to_label_map[f"#{node_id}"] = node_label["token"] + break + + # Get relationship mappings + relationship_mappings = aura_data_import_data_model["dataModel"][ + "graphMappingRepresentation" + ]["relationshipMappings"] + + # Create mapping from relationship object ID to relationship mapping + rel_obj_to_mapping = {} + for rel_mapping in relationship_mappings: + rel_ref = rel_mapping["relationship"]["$ref"] + rel_obj_to_mapping[rel_ref] = rel_mapping + + # Create mapping from node object ID to node mapping + node_obj_to_mapping = {} + for node_mapping in node_mappings: + node_ref = node_mapping["node"]["$ref"] + node_obj_to_mapping[node_ref] = node_mapping + + # Convert nodes + nodes = [] + for node_label in graph_schema["nodeLabels"]: + # Find corresponding node object type + node_obj_id = None + for node_obj in graph_schema["nodeObjectTypes"]: + for label_ref in node_obj["labels"]: + if label_ref["$ref"] == f"#{node_label['$id']}": + node_obj_id = f"#{node_obj['$id']}" + break + + key_property_token = node_key_map.get( + node_obj_id, + node_label["properties"][0]["token"] + if node_label["properties"] + else "id", + ) + + # Get the corresponding node mapping + node_mapping = node_obj_to_mapping.get( + node_obj_id, + { + "node": {"$ref": node_obj_id}, + "tableName": "unknown", + "propertyMappings": [], + }, + ) + + node = Node.from_aura_data_import( + node_label, key_property_token, node_mapping, source_type + ) + nodes.append(node) + + # Convert relationships + relationships = [] + for rel_obj in graph_schema["relationshipObjectTypes"]: + # Find corresponding relationship type + rel_type_id = rel_obj["type"]["$ref"].replace("#", "") + rel_type = None + for rt in graph_schema["relationshipTypes"]: + if rt["$id"] == rel_type_id: + rel_type = rt + break + + if rel_type: + # Get the corresponding relationship mapping + rel_obj_id = f"#{rel_obj['$id']}" + rel_mapping = rel_obj_to_mapping.get( + rel_obj_id, + { + "relationship": {"$ref": rel_obj_id}, + "tableName": "unknown", + "propertyMappings": [], + }, + ) + + relationship = Relationship.from_aura_data_import( + rel_type, rel_obj, node_id_to_label_map, rel_mapping, source_type + ) + relationships.append(relationship) + + # Store visualization coordinates in node metadata + visualization_data = aura_data_import_data_model.get("visualisation", {}) + vis_nodes = visualization_data.get("nodes", []) + vis_node_positions = { + vis_node["id"]: vis_node["position"] for vis_node in vis_nodes + } + + # Update node metadata with visualization coordinates + for i, node in enumerate(nodes): + node_id = f"n:{i}" + if node_id in vis_node_positions: + node.metadata["visualization"] = { + "position": vis_node_positions[node_id] + } + + # Store Aura Data Import metadata (excluding visualization since it's now in nodes) + metadata = { + "aura_data_import": { + "version": aura_data_import_data_model.get("version"), + "dataModel_version": aura_data_import_data_model["dataModel"].get( + "version" + ), + "constraints": graph_schema.get("constraints", []), + "indexes": graph_schema.get("indexes", []), + "configurations": aura_data_import_data_model["dataModel"].get( + "configurations", {} + ), + "dataSourceSchema": aura_data_import_data_model["dataModel"][ + "graphMappingRepresentation" + ].get("dataSourceSchema", {}), + } + } + + return cls(nodes=nodes, relationships=relationships, metadata=metadata) + + def to_aura_data_import_dict(self) -> AuraDataImportModels.AuraDataImportDataModel: + """Convert the data model to an Aura Data Import dictionary.""" + # Check if we have stored Aura Data Import metadata + aura_metadata = self.metadata.get("aura_data_import", {}) + + # Generate IDs following the original schema patterns + node_labels = [] + node_object_types = [] + node_key_properties = [] + constraints = [] + indexes = [] + + # Track property IDs to match original schema + node_to_key_prop_id = {} + + # Generate property IDs dynamically + global_property_counter = 0 + + for i, node in enumerate(self.nodes): + node_label_id = f"nl:{i}" + node_obj_id = f"n:{i}" + constraint_id = f"c:{i}" + index_id = f"i:{i}" + + # Use stored original property ID if available, otherwise generate new one + key_prop_id = node.key_property.metadata.get("aura_data_import", {}).get( + "original_id", f"p:{global_property_counter}" + ) + if not node.key_property.metadata.get("aura_data_import", {}).get( + "original_id" + ): + global_property_counter += 1 + + # Build property mapping for additional properties + node_prop_mapping = {} + for prop in node.properties: + stored_id = prop.metadata.get("aura_data_import", {}).get("original_id") + if stored_id: + node_prop_mapping[prop.name] = stored_id + else: + node_prop_mapping[prop.name] = f"p:{global_property_counter}" + global_property_counter += 1 + + node_to_key_prop_id[node_obj_id] = key_prop_id + + # Use the updated Node.to_aura_data_import method + node_label, key_property, constraint, index = node.to_aura_data_import( + node_label_id, + node_obj_id, + key_prop_id, + constraint_id, + index_id, + node_prop_mapping, + ) + + node_labels.append(node_label) + node_key_properties.append(key_property) + constraints.append(constraint) + indexes.append(index) + + # Create node object type + node_object_type = { + "$id": node_obj_id, + "labels": [{"$ref": f"#{node_label_id}"}], + } + node_object_types.append(node_object_type) + + # Handle relationships - start from rt:1, r:1 (not rt:0, r:0) + relationship_types = [] + relationship_object_types = [] + + for i, rel in enumerate(self.relationships): + rel_type_id = f"rt:{i + 1}" # Start from 1 + rel_obj_id = f"r:{i + 1}" # Start from 1 + + # Find start and end node IDs + start_node_id = None + end_node_id = None + for j, node in enumerate(self.nodes): + if node.label == rel.start_node_label: + start_node_id = f"n:{j}" + if node.label == rel.end_node_label: + end_node_id = f"n:{j}" + + # Generate constraint and index IDs if relationship has key property + constraint_id = None + index_id = None + if rel.key_property: + # Continue constraint and index numbering after nodes + constraint_id = f"c:{len(self.nodes) + i}" + index_id = f"i:{len(self.nodes) + i}" + + rel_type, rel_obj, rel_constraint, rel_index = rel.to_aura_data_import( + rel_type_id, + rel_obj_id, + start_node_id, + end_node_id, + constraint_id, + index_id, + ) + relationship_types.append(rel_type) + relationship_object_types.append(rel_obj) + + # Add relationship constraints and indexes if they exist + if rel_constraint: + constraints.append(rel_constraint) + if rel_index: + indexes.append(rel_index) + + # Create node mappings with property mappings for round-trip conversion + # We need to extract the property IDs from the already created node_labels + node_mappings = [] + + for i, node in enumerate(self.nodes): + node_obj_id = f"n:{i}" + + # Get the property IDs from the corresponding node label that was just created + node_label = node_labels[i] # This corresponds to the current node + + # Create property mappings using the exact property IDs from the node label + property_mappings = [] + + for prop_def in node_label["properties"]: + prop_id = prop_def["$id"] + prop_token = prop_def["token"] + + # Find the corresponding property in our node to get the field name + field_name = prop_token # default to token name + + # Check key property first + if node.key_property.name == prop_token: + field_name = ( + node.key_property.source.column_name + if node.key_property.source + and node.key_property.source.column_name + else prop_token + ) + else: + # Check other properties + for prop in node.properties: + if prop.name == prop_token: + field_name = ( + prop.source.column_name + if prop.source and prop.source.column_name + else prop_token + ) + break + + property_mappings.append( + {"property": {"$ref": f"#{prop_id}"}, "fieldName": field_name} + ) + + # Use the property source information if available, otherwise use default + table_name = ( + node.key_property.source.table_name + if node.key_property.source and node.key_property.source.table_name + else f"{node.label.lower()}.csv" + ) + + node_mapping = { + "node": {"$ref": f"#{node_obj_id}"}, + "tableName": table_name, + "propertyMappings": property_mappings, + } + node_mappings.append(node_mapping) + + # Create relationship mappings + relationship_mappings = [] + for i, rel in enumerate(self.relationships): + rel_obj_id = f"r:{i + 1}" # Start from 1 + + # Find source and target nodes + source_node = None + target_node = None + for node in self.nodes: + if node.label == rel.start_node_label: + source_node = node + if node.label == rel.end_node_label: + target_node = node + + # Determine table name from relationship properties first, then fall back to source node + table_name = None + + # Check if any relationship property has source information with table name + if ( + rel.key_property + and rel.key_property.source + and rel.key_property.source.table_name + ): + table_name = rel.key_property.source.table_name + else: + for prop in rel.properties: + if prop.source and prop.source.table_name: + table_name = prop.source.table_name + break + + # If no relationship property has table info, use source node's table or default + if not table_name: + table_name = ( + source_node.key_property.source.table_name + if source_node + and source_node.key_property.source + and source_node.key_property.source.table_name + else f"{source_node.label.lower()}_{rel.type.lower()}_{target_node.label.lower()}.csv" + ) + + # Generate field mappings based on node key properties + from_field = ( + source_node.key_property.source.column_name + if source_node + and source_node.key_property.source + and source_node.key_property.source.column_name + else source_node.key_property.name.lower() + ) + to_field = ( + target_node.key_property.source.column_name + if target_node + and target_node.key_property.source + and target_node.key_property.source.column_name + else target_node.key_property.name.lower() + ) + + # Create property mappings for relationship properties + property_mappings = [] + + # Find the corresponding relationship type to get property IDs + rel_type_id = f"rt:{i + 1}" + rel_type = None + for rt in relationship_types: + if rt["$id"] == rel_type_id: + rel_type = rt + break + + if rel_type and rel_type["properties"]: + for prop_def in rel_type["properties"]: + prop_id = prop_def["$id"] + prop_token = prop_def["token"] + + # Find the corresponding property in our relationship to get the field name + field_name = prop_token # default to token name + + # Check key property first + if rel.key_property and rel.key_property.name == prop_token: + field_name = ( + rel.key_property.source.column_name + if rel.key_property.source + and rel.key_property.source.column_name + else prop_token + ) + else: + # Check other properties + for prop in rel.properties: + if prop.name == prop_token: + field_name = ( + prop.source.column_name + if prop.source and prop.source.column_name + else prop_token + ) + break + + property_mappings.append( + {"property": {"$ref": f"#{prop_id}"}, "fieldName": field_name} + ) + + rel_mapping = { + "relationship": {"$ref": f"#{rel_obj_id}"}, + "tableName": table_name, + "propertyMappings": property_mappings, + "fromMapping": {"fieldName": from_field}, + "toMapping": {"fieldName": to_field}, + } + relationship_mappings.append(rel_mapping) + + # Use stored metadata if available, otherwise create defaults + version = aura_metadata.get("version", "2.3.1-beta.0") + datamodel_version = aura_metadata.get("dataModel_version", "2.3.1-beta.0") + stored_constraints = aura_metadata.get("constraints") + stored_indexes = aura_metadata.get("indexes") + stored_configurations = aura_metadata.get("configurations", {"idsToIgnore": []}) + + # Generate table schemas for all referenced tables + table_names = set() + for node_mapping in node_mappings: + table_names.add(node_mapping["tableName"]) + for rel_mapping in relationship_mappings: + table_names.add(rel_mapping["tableName"]) + + # Create table schemas if not stored in metadata + stored_data_source_schema = aura_metadata.get("dataSourceSchema") + if not stored_data_source_schema or not stored_data_source_schema.get( + "tableSchemas" + ): + # Determine the source type based on the properties in the data model + # Check all properties to see if any have a different source type + source_types = set() + for node in self.nodes: + if node.key_property.source and node.key_property.source.source_type: + source_types.add(node.key_property.source.source_type) + for prop in node.properties: + if prop.source and prop.source.source_type: + source_types.add(prop.source.source_type) + + for rel in self.relationships: + if ( + rel.key_property + and rel.key_property.source + and rel.key_property.source.source_type + ): + source_types.add(rel.key_property.source.source_type) + for prop in rel.properties: + if prop.source and prop.source.source_type: + source_types.add(prop.source.source_type) + + # Default to "local" if no source types found, or use the first one found + # In practice, all properties should have the same source type for a given data model + data_source_type = source_types.pop() if source_types else "local" + + table_schemas = [] + for table_name in sorted(table_names): # Sort for consistent output + # Generate field schemas based on node/relationship mappings + fields = [] + + # Collect fields from node mappings + for node_mapping in node_mappings: + if node_mapping["tableName"] == table_name: + for prop_mapping in node_mapping["propertyMappings"]: + field_name = prop_mapping["fieldName"] + # Find the property to get its type + prop_ref = prop_mapping["property"]["$ref"].replace("#", "") + prop_type = "string" # default + + # Search for the property in node labels + for node_label in node_labels: + for prop in node_label["properties"]: + if prop["$id"] == prop_ref: + prop_type = prop["type"]["type"] + break + + fields.append( + { + "name": field_name, + "sample": f"sample_{field_name}", + "recommendedType": {"type": prop_type}, + } + ) + + # Collect fields from relationship mappings + for rel_mapping in relationship_mappings: + if rel_mapping["tableName"] == table_name: + # Add from/to fields + from_field = rel_mapping["fromMapping"]["fieldName"] + to_field = rel_mapping["toMapping"]["fieldName"] + + # Add from field if not already present + if not any(f["name"] == from_field for f in fields): + fields.append( + { + "name": from_field, + "sample": f"sample_{from_field}", + "recommendedType": {"type": "string"}, + } + ) + + # Add to field if not already present + if not any(f["name"] == to_field for f in fields): + fields.append( + { + "name": to_field, + "sample": f"sample_{to_field}", + "recommendedType": {"type": "string"}, + } + ) + + # Add relationship property fields + for prop_mapping in rel_mapping["propertyMappings"]: + field_name = prop_mapping["fieldName"] + # Find the property to get its type + prop_ref = prop_mapping["property"]["$ref"].replace("#", "") + prop_type = "string" # default + + # Search for the property in relationship types + for rel_type in relationship_types: + for prop in rel_type["properties"]: + if prop["$id"] == prop_ref: + prop_type = prop["type"]["type"] + break + + # Add field if not already present + if not any(f["name"] == field_name for f in fields): + fields.append( + { + "name": field_name, + "sample": f"sample_{field_name}", + "recommendedType": {"type": prop_type}, + } + ) + + table_schemas.append({"name": table_name, "fields": fields}) + + stored_data_source_schema = { + "type": data_source_type, + "tableSchemas": table_schemas, + } + else: + stored_data_source_schema = aura_metadata.get( + "dataSourceSchema", {"type": "local", "tableSchemas": []} + ) + + # Reconstruct visualization nodes from node metadata and generate for new nodes + visualization_nodes = [] + for i, node in enumerate(self.nodes): + node_id = f"n:{i}" + + # Check if node has stored visualization position + if ( + "visualization" in node.metadata + and "position" in node.metadata["visualization"] + ): + position = node.metadata["visualization"]["position"] + else: + # Generate default position for new nodes + # Use a grid layout: 5 nodes per row, 200px spacing + row = i // 5 + col = i % 5 + position = {"x": col * 200.0, "y": row * 200.0} + + vis_node = {"id": node_id, "position": position} + visualization_nodes.append(vis_node) + + # Build complete structure + result = { + "version": version, + "visualisation": {"nodes": visualization_nodes}, + "dataModel": { + "version": datamodel_version, + "graphSchemaRepresentation": { + "version": "1.0.0", + "graphSchema": { + "nodeLabels": node_labels, + "relationshipTypes": relationship_types, + "nodeObjectTypes": node_object_types, + "relationshipObjectTypes": relationship_object_types, + "constraints": stored_constraints + if stored_constraints is not None + else constraints, + "indexes": stored_indexes + if stored_indexes is not None + else indexes, + }, + }, + "graphSchemaExtensionsRepresentation": { + "nodeKeyProperties": node_key_properties + }, + "graphMappingRepresentation": { + "dataSourceSchema": stored_data_source_schema, + "nodeMappings": node_mappings, + "relationshipMappings": relationship_mappings, + }, + "configurations": stored_configurations, + }, + } + + return result + + def to_aura_data_import_json_str(self) -> str: + """Convert the data model to an Aura Data Import JSON string.""" + return json.dumps(self.to_aura_data_import_dict(), indent=2) + def get_node_cypher_ingest_query_for_many_records(self, node_label: str) -> str: "Generate a Cypher query to ingest a list of Node records into a Neo4j database." node = self.nodes_dict[node_label] diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py index c836892..7e7da2d 100644 --- a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py @@ -51,9 +51,7 @@ def neo4j_data_ingest_process() -> str: return DATA_INGEST_PROCESS @mcp.tool() - def validate_node( - node: Node, return_validated: bool = False - ) -> bool | dict[str, Any]: + def validate_node(node: Node, return_validated: bool = False) -> bool | Node: "Validate a single node. Returns True if the node is valid, otherwise raises a ValueError. If return_validated is True, returns the validated node." logger.info("Validating a single node.") try: @@ -70,7 +68,7 @@ def validate_node( @mcp.tool() def validate_relationship( relationship: Relationship, return_validated: bool = False - ) -> bool | dict[str, Any]: + ) -> bool | Relationship: "Validate a single relationship. Returns True if the relationship is valid, otherwise raises a ValueError. If return_validated is True, returns the validated relationship." logger.info("Validating a single relationship.") try: @@ -89,7 +87,7 @@ def validate_relationship( @mcp.tool() def validate_data_model( data_model: DataModel, return_validated: bool = False - ) -> bool | dict[str, Any]: + ) -> bool | DataModel: "Validate the entire data model. Returns True if the data model is valid, otherwise raises a ValueError. If return_validated is True, returns the validated data model." logger.info("Validating the entire data model.") try: @@ -110,21 +108,37 @@ def load_from_arrows_json(arrows_data_model_dict: dict[str, Any]) -> DataModel: return DataModel.from_arrows(arrows_data_model_dict) @mcp.tool() - def export_to_arrows_json(data_model: DataModel) -> str: + def export_to_arrows_json_str(data_model: DataModel) -> str: "Export the data model to the Arrows web application format. Returns a JSON string. This should be presented to the user as an artifact if possible." + validated_data_model: DataModel = validate_data_model(data_model, True) logger.info("Exporting the data model to the Arrows web application format.") - return data_model.to_arrows_json_str() + return validated_data_model.to_arrows_json_str() + + @mcp.tool() + def load_from_aura_data_import_json( + aura_data_import_dict: dict[str, Any], + ) -> DataModel: + "Load a data model from the Aura Data Import format. Returns a data model as a JSON string." + logger.info("Loading a data model from the Aura Data Import format.") + return DataModel.from_aura_data_import(aura_data_import_dict) + + @mcp.tool() + def export_to_aura_data_import_json_str(data_model: DataModel) -> str: + """ + Export the data model to the Aura Data Import format. + If the data source information is known, it should be provided in the appropriate fields of the data model. + Returns a JSON string. This should be presented to the user as an artifact if possible. + """ + validated_data_model: DataModel = validate_data_model(data_model, True) + logger.info("Exporting the data model to the Aura Data Import format.") + return validated_data_model.to_aura_data_import_json_str() @mcp.tool() def get_mermaid_config_str(data_model: DataModel) -> str: "Get the Mermaid configuration string for the data model. This may be visualized in Claude Desktop and other applications with Mermaid support." + validated_data_model: DataModel = validate_data_model(data_model, True) logger.info("Getting the Mermaid configuration string for the data model.") - try: - dm_validated = DataModel.model_validate(data_model, strict=True) - except ValidationError as e: - logger.error(f"Validation error: {e}") - raise ValueError(f"Validation error: {e}") - return dm_validated.get_mermaid_config_str() + return validated_data_model.get_mermaid_config_str() @mcp.tool() def get_node_cypher_ingest_query( diff --git a/servers/mcp-neo4j-data-modeling/tests/integration/conftest.py b/servers/mcp-neo4j-data-modeling/tests/integration/conftest.py deleted file mode 100644 index 4ebae19..0000000 --- a/servers/mcp-neo4j-data-modeling/tests/integration/conftest.py +++ /dev/null @@ -1,71 +0,0 @@ -import os -from typing import Any - -import pytest -import pytest_asyncio -from neo4j import AsyncGraphDatabase -from testcontainers.neo4j import Neo4jContainer - -from mcp_neo4j_data_modeling.server import create_mcp_server - -neo4j = ( - Neo4jContainer("neo4j:latest") - .with_env("NEO4J_apoc_export_file_enabled", "true") - .with_env("NEO4J_apoc_import_file_enabled", "true") - .with_env("NEO4J_apoc_import_file_use__neo4j__config", "true") - .with_env("NEO4J_PLUGINS", '["apoc"]') -) - - -@pytest.fixture(scope="module", autouse=True) -def setup(request): - neo4j.start() - - def remove_container(): - neo4j.get_driver().close() - neo4j.stop() - - request.addfinalizer(remove_container) - os.environ["NEO4J_URI"] = neo4j.get_connection_url() - os.environ["NEO4J_HOST"] = neo4j.get_container_host_ip() - os.environ["NEO4J_PORT"] = neo4j.get_exposed_port(7687) - - yield neo4j - - -@pytest_asyncio.fixture(scope="function") -async def async_neo4j_driver(setup: Neo4jContainer): - driver = AsyncGraphDatabase.driver( - setup.get_connection_url(), auth=(setup.username, setup.password) - ) - try: - yield driver - finally: - await driver.close() - - -@pytest_asyncio.fixture(scope="function") -async def mcp_server(async_neo4j_driver): - mcp = create_mcp_server(async_neo4j_driver, "neo4j") - - return mcp - - -@pytest.fixture(scope="function") -def init_data(setup: Neo4jContainer, clear_data: Any): - with setup.get_driver().session(database="neo4j") as session: - session.run("CREATE (a:Person {name: 'Alice', age: 30})") - session.run("CREATE (b:Person {name: 'Bob', age: 25})") - session.run("CREATE (c:Person {name: 'Charlie', age: 35})") - session.run( - "MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) CREATE (a)-[:FRIEND]->(b)" - ) - session.run( - "MATCH (b:Person {name: 'Bob'}), (c:Person {name: 'Charlie'}) CREATE (b)-[:FRIEND]->(c)" - ) - - -@pytest.fixture(scope="function") -def clear_data(setup: Neo4jContainer): - with setup.get_driver().session(database="neo4j") as session: - session.run("MATCH (n) DETACH DELETE n") diff --git a/servers/mcp-neo4j-data-modeling/tests/resources/neo4j_importer_model_2025-06-30.json b/servers/mcp-neo4j-data-modeling/tests/resources/neo4j_importer_model_2025-06-30.json new file mode 100644 index 0000000..bf390da --- /dev/null +++ b/servers/mcp-neo4j-data-modeling/tests/resources/neo4j_importer_model_2025-06-30.json @@ -0,0 +1,899 @@ +{ + "version": "2.3.1-beta.0", + "visualisation": { + "nodes": [ + { + "id": "n:0", + "position": { + "x": 0, + "y": 0 + } + }, + { + "id": "n:1", + "position": { + "x": 59.4453125, + "y": -93.5 + } + }, + { + "id": "n:2", + "position": { + "x": 68.11197916666667, + "y": 7.166666666666666 + } + }, + { + "id": "n:3", + "position": { + "x": -52.888020833333336, + "y": -94.83333333333333 + } + }, + { + "id": "n:4", + "position": { + "x": 42.778645833333336, + "y": 102.83333333333333 + } + } + ] + }, + "dataModel": { + "version": "2.3.1-beta.0", + "graphSchemaRepresentation": { + "version": "1.0.0", + "graphSchema": { + "nodeLabels": [ + { + "$id": "nl:0", + "token": "Country", + "properties": [ + { + "$id": "p:0_0", + "token": "countryId", + "type": { + "type": "integer" + }, + "nullable": false + }, + { + "$id": "p:0_1", + "token": "name", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_2", + "token": "iso3", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_3", + "token": "iso2", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_4", + "token": "numericCode", + "type": { + "type": "integer" + }, + "nullable": false + }, + { + "$id": "p:0_5", + "token": "phoneCode", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_6", + "token": "capital", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_10", + "token": "tld", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_11", + "token": "native", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_15", + "token": "latitude", + "type": { + "type": "float" + }, + "nullable": false + }, + { + "$id": "p:0_16", + "token": "longitude", + "type": { + "type": "float" + }, + "nullable": false + }, + { + "$id": "p:0_17", + "token": "emoji", + "type": { + "type": "string" + }, + "nullable": false + }, + { + "$id": "p:0_18", + "token": "emojiU", + "type": { + "type": "string" + }, + "nullable": false + } + ] + }, + { + "$id": "nl:1", + "token": "SubRegion", + "properties": [ + { + "$id": "p:3", + "token": "subregion", + "type": { + "type": "string" + }, + "nullable": true + } + ] + }, + { + "$id": "nl:2", + "token": "Region", + "properties": [ + { + "$id": "p:1", + "token": "region", + "type": { + "type": "string" + }, + "nullable": true + } + ] + }, + { + "$id": "nl:3", + "token": "TimeZones", + "properties": [ + { + "$id": "p:2", + "token": "timezones", + "type": { + "type": "string" + }, + "nullable": true + } + ] + }, + { + "$id": "nl:4", + "token": "Currency", + "properties": [ + { + "$id": "p:4", + "token": "currency", + "type": { + "type": "string" + }, + "nullable": true + }, + { + "$id": "p:5", + "token": "currency_name", + "type": { + "type": "string" + }, + "nullable": true + }, + { + "$id": "p:6", + "token": "currency_symbol", + "type": { + "type": "string" + }, + "nullable": true + } + ] + } + ], + "relationshipTypes": [ + { + "$id": "rt:1", + "token": "IN_SUBREGION", + "properties": [] + }, + { + "$id": "rt:2", + "token": "IN_REGION", + "properties": [] + }, + { + "$id": "rt:3", + "token": "IN_TIMEZONE", + "properties": [] + }, + { + "$id": "rt:4", + "token": "USES_CURRENCY", + "properties": [] + } + ], + "nodeObjectTypes": [ + { + "$id": "n:0", + "labels": [ + { + "$ref": "#nl:0" + } + ] + }, + { + "$id": "n:1", + "labels": [ + { + "$ref": "#nl:1" + } + ] + }, + { + "$id": "n:2", + "labels": [ + { + "$ref": "#nl:2" + } + ] + }, + { + "$id": "n:3", + "labels": [ + { + "$ref": "#nl:3" + } + ] + }, + { + "$id": "n:4", + "labels": [ + { + "$ref": "#nl:4" + } + ] + } + ], + "relationshipObjectTypes": [ + { + "$id": "r:1", + "type": { + "$ref": "#rt:1" + }, + "from": { + "$ref": "#n:0" + }, + "to": { + "$ref": "#n:1" + } + }, + { + "$id": "r:2", + "type": { + "$ref": "#rt:2" + }, + "from": { + "$ref": "#n:1" + }, + "to": { + "$ref": "#n:2" + } + }, + { + "$id": "r:3", + "type": { + "$ref": "#rt:3" + }, + "from": { + "$ref": "#n:0" + }, + "to": { + "$ref": "#n:3" + } + }, + { + "$id": "r:4", + "type": { + "$ref": "#rt:4" + }, + "from": { + "$ref": "#n:0" + }, + "to": { + "$ref": "#n:4" + } + } + ], + "constraints": [ + { + "$id": "c:0", + "name": "countries.csv", + "constraintType": "uniqueness", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:0" + }, + "relationshipType": null, + "properties": [ + { + "$ref": "#p:0_0" + } + ] + }, + { + "$id": "c:1", + "name": "region_Region_uniq", + "constraintType": "uniqueness", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:2" + }, + "properties": [ + { + "$ref": "#p:1" + } + ] + }, + { + "$id": "c:2", + "name": "timezones_TimeZones_uniq", + "constraintType": "uniqueness", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:3" + }, + "properties": [ + { + "$ref": "#p:2" + } + ] + }, + { + "$id": "c:3", + "name": "subregion_SubRegion_uniq", + "constraintType": "uniqueness", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:1" + }, + "properties": [ + { + "$ref": "#p:3" + } + ] + }, + { + "$id": "c:4", + "name": "currency_Currency_uniq", + "constraintType": "uniqueness", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:4" + }, + "properties": [ + { + "$ref": "#p:4" + } + ] + } + ], + "indexes": [ + { + "$id": "i:0", + "name": "countries.csv", + "indexType": "default", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:0" + }, + "relationshipType": null, + "properties": [ + { + "$ref": "#p:0_0" + } + ] + }, + { + "$id": "i:1", + "name": "region_Region_uniq", + "indexType": "default", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:2" + }, + "properties": [ + { + "$ref": "#p:1" + } + ] + }, + { + "$id": "i:2", + "name": "timezones_TimeZones_uniq", + "indexType": "default", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:3" + }, + "properties": [ + { + "$ref": "#p:2" + } + ] + }, + { + "$id": "i:3", + "name": "subregion_SubRegion_uniq", + "indexType": "default", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:1" + }, + "properties": [ + { + "$ref": "#p:3" + } + ] + }, + { + "$id": "i:4", + "name": "currency_Currency_uniq", + "indexType": "default", + "entityType": "node", + "nodeLabel": { + "$ref": "#nl:4" + }, + "properties": [ + { + "$ref": "#p:4" + } + ] + } + ] + } + }, + "graphSchemaExtensionsRepresentation": { + "nodeKeyProperties": [ + { + "node": { + "$ref": "#n:0" + }, + "keyProperty": { + "$ref": "#p:0_0" + } + }, + { + "node": { + "$ref": "#n:2" + }, + "keyProperty": { + "$ref": "#p:1" + } + }, + { + "node": { + "$ref": "#n:3" + }, + "keyProperty": { + "$ref": "#p:2" + } + }, + { + "node": { + "$ref": "#n:1" + }, + "keyProperty": { + "$ref": "#p:3" + } + }, + { + "node": { + "$ref": "#n:4" + }, + "keyProperty": { + "$ref": "#p:4" + } + } + ] + }, + "graphMappingRepresentation": { + "dataSourceSchema": { + "type": "local", + "tableSchemas": [ + { + "name": "countries.csv", + "fields": [ + { + "name": "id", + "sample": "", + "recommendedType": { + "type": "integer" + } + }, + { + "name": "name", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "iso3", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "iso2", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "numeric_code", + "sample": "", + "recommendedType": { + "type": "integer" + } + }, + { + "name": "phone_code", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "capital", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "currency", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "currency_name", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "currency_symbol", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "tld", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "native", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "region", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "subregion", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "timezones", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "latitude", + "sample": "", + "recommendedType": { + "type": "float" + } + }, + { + "name": "longitude", + "sample": "", + "recommendedType": { + "type": "float" + } + }, + { + "name": "emoji", + "sample": "", + "recommendedType": { + "type": "string" + } + }, + { + "name": "emojiU", + "sample": "", + "recommendedType": { + "type": "string" + } + } + ], + "primaryKeys": [], + "foreignKeys": [], + "expanded": true + } + ] + }, + "nodeMappings": [ + { + "node": { + "$ref": "#n:0" + }, + "tableName": "countries.csv", + "propertyMappings": [ + { + "property": { + "$ref": "#p:0_0" + }, + "fieldName": "id" + }, + { + "property": { + "$ref": "#p:0_1" + }, + "fieldName": "name" + }, + { + "property": { + "$ref": "#p:0_2" + }, + "fieldName": "iso3" + }, + { + "property": { + "$ref": "#p:0_3" + }, + "fieldName": "iso2" + }, + { + "property": { + "$ref": "#p:0_4" + }, + "fieldName": "numeric_code" + }, + { + "property": { + "$ref": "#p:0_5" + }, + "fieldName": "phone_code" + }, + { + "property": { + "$ref": "#p:0_6" + }, + "fieldName": "capital" + }, + { + "property": { + "$ref": "#p:0_10" + }, + "fieldName": "tld" + }, + { + "property": { + "$ref": "#p:0_11" + }, + "fieldName": "native" + }, + { + "property": { + "$ref": "#p:0_15" + }, + "fieldName": "latitude" + }, + { + "property": { + "$ref": "#p:0_16" + }, + "fieldName": "longitude" + }, + { + "property": { + "$ref": "#p:0_17" + }, + "fieldName": "emoji" + }, + { + "property": { + "$ref": "#p:0_18" + }, + "fieldName": "emojiU" + } + ] + }, + { + "node": { + "$ref": "#n:1" + }, + "tableName": "countries.csv", + "propertyMappings": [ + { + "fieldName": "subregion", + "property": { + "$ref": "#p:3" + } + } + ] + }, + { + "node": { + "$ref": "#n:2" + }, + "tableName": "countries.csv", + "propertyMappings": [ + { + "fieldName": "region", + "property": { + "$ref": "#p:1" + } + } + ] + }, + { + "node": { + "$ref": "#n:3" + }, + "tableName": "countries.csv", + "propertyMappings": [ + { + "fieldName": "timezones", + "property": { + "$ref": "#p:2" + } + } + ] + }, + { + "node": { + "$ref": "#n:4" + }, + "tableName": "countries.csv", + "propertyMappings": [ + { + "fieldName": "currency", + "property": { + "$ref": "#p:4" + } + }, + { + "fieldName": "currency_name", + "property": { + "$ref": "#p:5" + } + }, + { + "fieldName": "currency_symbol", + "property": { + "$ref": "#p:6" + } + } + ] + } + ], + "relationshipMappings": [ + { + "relationship": { + "$ref": "#r:3" + }, + "tableName": "countries.csv", + "propertyMappings": [], + "fromMapping": { + "fieldName": "id" + }, + "toMapping": { + "fieldName": "timezones" + } + }, + { + "relationship": { + "$ref": "#r:1" + }, + "tableName": "countries.csv", + "propertyMappings": [], + "fromMapping": { + "fieldName": "id" + }, + "toMapping": { + "fieldName": "subregion" + } + }, + { + "relationship": { + "$ref": "#r:2" + }, + "tableName": "countries.csv", + "propertyMappings": [], + "fromMapping": { + "fieldName": "subregion" + }, + "toMapping": { + "fieldName": "region" + } + }, + { + "relationship": { + "$ref": "#r:4" + }, + "tableName": "countries.csv", + "propertyMappings": [], + "fromMapping": { + "fieldName": "id" + }, + "toMapping": { + "fieldName": "currency" + } + } + ] + }, + "configurations": { + "idsToIgnore": [] + } + } +} \ No newline at end of file diff --git a/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py b/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py new file mode 100644 index 0000000..fc9cac2 --- /dev/null +++ b/servers/mcp-neo4j-data-modeling/tests/unit/test_aura_data_import_conversion.py @@ -0,0 +1,978 @@ +"""Unit tests for Aura Data Import conversion methods.""" + +import json +from pathlib import Path +from typing import Any + +import pytest + +from mcp_neo4j_data_modeling.data_model import ( + DataModel, + Node, + Property, + PropertySource, + Relationship, +) + + +@pytest.fixture +def sample_aura_data_import_model(): + """Load the sample Aura Data Import model from the JSON file.""" + json_file = ( + Path(__file__).parent.parent + / "resources" + / "neo4j_importer_model_2025-06-30.json" + ) + with open(json_file, "r") as f: + return json.load(f) + + +@pytest.fixture +def sample_property_data(): + """Sample property data from the JSON file.""" + return { + "$id": "p:0_0", + "token": "countryId", + "type": {"type": "integer"}, + "nullable": False, + } + + +@pytest.fixture +def sample_source_mapping(): + """Sample source mapping for a property.""" + return {"tableName": "countries.csv", "fieldName": "id", "type": "local"} + + +@pytest.fixture +def sample_node_label(): + """Sample node label from the JSON file.""" + return { + "$id": "nl:1", + "token": "SubRegion", + "properties": [ + { + "$id": "p:3", + "token": "subregion", + "type": {"type": "string"}, + "nullable": True, + } + ], + } + + +@pytest.fixture +def sample_node_mapping(): + """Sample node mapping from the JSON file.""" + return { + "node": {"$ref": "#n:1"}, + "tableName": "countries.csv", + "propertyMappings": [{"fieldName": "subregion", "property": {"$ref": "#p:3"}}], + } + + +class TestPropertyConversion: + """Test Property conversion methods.""" + + def test_from_aura_data_import_string_property(self, sample_source_mapping): + """Test converting a string property from Aura Data Import format.""" + aura_property = { + "$id": "p:1", + "token": "name", + "type": {"type": "string"}, + "nullable": False, + } + + prop = Property.from_aura_data_import(aura_property, sample_source_mapping) + + assert prop.name == "name" + assert prop.type == "STRING" + assert prop.source.column_name == "id" + assert prop.source.table_name == "countries.csv" + assert prop.source.location == "local" + + def test_from_aura_data_import_integer_property(self, sample_source_mapping): + """Test converting an integer property from Aura Data Import format.""" + aura_property = { + "$id": "p:0_0", + "token": "countryId", + "type": {"type": "integer"}, + "nullable": False, + } + + prop = Property.from_aura_data_import(aura_property, sample_source_mapping) + + assert prop.name == "countryId" + assert prop.type == "INTEGER" + + def test_from_aura_data_import_float_property(self, sample_source_mapping): + """Test converting a float property from Aura Data Import format.""" + aura_property = { + "$id": "p:0_15", + "token": "latitude", + "type": {"type": "float"}, + "nullable": False, + } + + prop = Property.from_aura_data_import(aura_property, sample_source_mapping) + + assert prop.name == "latitude" + assert prop.type == "FLOAT" + + def test_from_aura_data_import_boolean_property(self, sample_source_mapping): + """Test converting a boolean property from Aura Data Import format.""" + aura_property = { + "$id": "p:7", + "token": "active", + "type": {"type": "boolean"}, + "nullable": True, + } + + prop = Property.from_aura_data_import(aura_property, sample_source_mapping) + + assert prop.name == "active" + assert prop.type == "BOOLEAN" + + def test_to_aura_data_import_key_property(self): + """Test converting a key property to Aura Data Import format.""" + prop = Property(name="id", type="INTEGER") + + result = prop.to_aura_data_import("p:0_0", is_key=True) + + assert result["$id"] == "p:0_0" + assert result["token"] == "id" + assert result["type"]["type"] == "integer" + assert not result["nullable"] # Key properties are not nullable + + def test_to_aura_data_import_non_key_property(self): + """Test converting a non-key property to Aura Data Import format.""" + prop = Property(name="name", type="STRING") + + result = prop.to_aura_data_import("p:0_1", is_key=False) + + assert result["$id"] == "p:0_1" + assert result["token"] == "name" + assert result["type"]["type"] == "string" + assert result["nullable"] # Non-key properties are nullable + + def test_to_aura_data_import_unknown_type_defaults_to_string(self): + """Test that unknown property types default to string.""" + prop = Property(name="custom", type="CUSTOM_TYPE") + + result = prop.to_aura_data_import("p:1", is_key=False) + + assert result["type"]["type"] == "string" + + +class TestNodeConversion: + """Test Node conversion methods.""" + + def test_from_aura_data_import_simple_node( + self, sample_node_label, sample_node_mapping + ): + """Test converting a simple node from Aura Data Import format.""" + node = Node.from_aura_data_import( + sample_node_label, + "subregion", # key property token + sample_node_mapping, + "local", # source_type + ) + + assert node.label == "SubRegion" + assert node.key_property.name == "subregion" + assert node.key_property.type == "STRING" + assert len(node.properties) == 0 # Only one property, which is the key + + def test_from_aura_data_import_complex_node(self, sample_aura_data_import_model): + """Test converting a complex node with multiple properties.""" + # Get the Country node from the sample data + country_node_label = sample_aura_data_import_model["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"]["nodeLabels"][0] + country_node_mapping = sample_aura_data_import_model["dataModel"][ + "graphMappingRepresentation" + ]["nodeMappings"][0] + + node = Node.from_aura_data_import( + country_node_label, + "countryId", # key property token + country_node_mapping, + "local", # source_type + ) + + assert node.label == "Country" + assert node.key_property.name == "countryId" + assert node.key_property.type == "INTEGER" + assert ( + len(node.properties) == 12 + ) # 13 total properties - 1 key = 12 non-key properties + + # Check some specific properties + property_names = [p.name for p in node.properties] + assert "name" in property_names + assert "iso3" in property_names + assert "latitude" in property_names + + def test_from_aura_data_import_missing_key_property_uses_first( + self, sample_node_label, sample_node_mapping + ): + """Test that when key property is not found, first property is used as key.""" + node = Node.from_aura_data_import( + sample_node_label, + "nonexistent_key", # This key doesn't exist + sample_node_mapping, + "local", # source_type + ) + + assert node.label == "SubRegion" + assert node.key_property.name == "subregion" # First property becomes key + assert len(node.properties) == 0 + + def test_to_aura_data_import_simple_node(self): + """Test converting a simple node to Aura Data Import format.""" + key_prop = Property(name="id", type="INTEGER") + other_prop = Property(name="name", type="STRING") + node = Node(label="TestNode", key_property=key_prop, properties=[other_prop]) + + node_label, key_property, constraint, index = node.to_aura_data_import( + "nl:0", "n:0", "p:0_0", "c:0", "i:0" + ) + + # Check node label + assert node_label["$id"] == "nl:0" + assert node_label["token"] == "TestNode" + assert len(node_label["properties"]) == 2 + + # Check key property is first and not nullable + assert node_label["properties"][0]["token"] == "id" + assert not node_label["properties"][0]["nullable"] + assert node_label["properties"][1]["token"] == "name" + assert node_label["properties"][1]["nullable"] + + # Check key property mapping + assert key_property["node"]["$ref"] == "#n:0" + assert key_property["keyProperty"]["$ref"] == "#p:0_0" + + # Check constraint + assert constraint["$id"] == "c:0" + assert constraint["name"] == "TestNode_constraint" + assert constraint["constraintType"] == "uniqueness" + assert constraint["entityType"] == "node" + assert constraint["nodeLabel"]["$ref"] == "#nl:0" + assert constraint["properties"][0]["$ref"] == "#p:0_0" + + # Check index + assert index["$id"] == "i:0" + assert index["name"] == "TestNode_index" + assert index["indexType"] == "default" + assert index["entityType"] == "node" + assert index["nodeLabel"]["$ref"] == "#nl:0" + assert index["properties"][0]["$ref"] == "#p:0_0" + + def test_node_mapping_property_not_found_raises_error(self, sample_node_label): + """Test that missing property in node mapping raises an error.""" + invalid_mapping = { + "node": {"$ref": "#n:1"}, + "tableName": "countries.csv", + "propertyMappings": [], # Empty mappings + } + + with pytest.raises(ValueError, match="Property p:3 not found in node mapping"): + Node.from_aura_data_import( + sample_node_label, "subregion", invalid_mapping, "local" + ) + + +class TestRelationshipConversion: + """Test Relationship conversion methods.""" + + def test_from_aura_data_import_simple_relationship(self): + """Test converting a simple relationship from Aura Data Import format.""" + rel_type = {"$id": "rt:1", "token": "IN_SUBREGION", "properties": []} + + rel_obj = { + "$id": "r:1", + "type": {"$ref": "#rt:1"}, + "from": {"$ref": "#n:0"}, + "to": {"$ref": "#n:1"}, + } + + node_id_to_label_map = {"#n:0": "Country", "#n:1": "SubRegion"} + + # Empty relationship mapping since there are no properties + rel_mapping = { + "relationship": {"$ref": "#r:1"}, + "tableName": "relationships.csv", + "propertyMappings": [], + } + + relationship = Relationship.from_aura_data_import( + rel_type, rel_obj, node_id_to_label_map, rel_mapping, "local" + ) + + assert relationship.type == "IN_SUBREGION" + assert relationship.start_node_label == "Country" + assert relationship.end_node_label == "SubRegion" + assert relationship.key_property is None + assert len(relationship.properties) == 0 + + def test_from_aura_data_import_relationship_with_properties(self): + """Test converting a relationship with properties.""" + rel_type = { + "$id": "rt:2", + "token": "CONNECTED_TO", + "properties": [ + { + "$id": "p:rel_1", + "token": "weight", + "type": {"type": "float"}, + "nullable": False, + }, + { + "$id": "p:rel_2", + "token": "since", + "type": {"type": "string"}, + "nullable": True, + }, + ], + } + + rel_obj = { + "$id": "r:2", + "type": {"$ref": "#rt:2"}, + "from": {"$ref": "#n:0"}, + "to": {"$ref": "#n:1"}, + } + + node_id_to_label_map = {"#n:0": "NodeA", "#n:1": "NodeB"} + + # Relationship mapping with properties + rel_mapping = { + "relationship": {"$ref": "#r:2"}, + "tableName": "relationships.csv", + "propertyMappings": [ + {"property": {"$ref": "#p:rel_1"}, "fieldName": "weight"}, + {"property": {"$ref": "#p:rel_2"}, "fieldName": "since"}, + ], + } + + relationship = Relationship.from_aura_data_import( + rel_type, rel_obj, node_id_to_label_map, rel_mapping, "local" + ) + + assert relationship.type == "CONNECTED_TO" + assert relationship.key_property is None # No automatic key property assignment + assert ( + len(relationship.properties) == 2 + ) # Both properties are regular properties + assert relationship.properties[0].name == "weight" + assert relationship.properties[1].name == "since" + + def test_to_aura_data_import_simple_relationship(self): + """Test converting a simple relationship to Aura Data Import format.""" + relationship = Relationship( + type="KNOWS", start_node_label="Person", end_node_label="Person" + ) + + rel_type, rel_obj, constraint, index = relationship.to_aura_data_import( + "rt:1", "r:1", "n:0", "n:1" + ) + + # Check relationship type + assert rel_type["$id"] == "rt:1" + assert rel_type["token"] == "KNOWS" + assert len(rel_type["properties"]) == 0 + + # Check relationship object + assert rel_obj["$id"] == "r:1" + assert rel_obj["type"]["$ref"] == "#rt:1" + assert rel_obj["from"]["$ref"] == "#n:0" + assert rel_obj["to"]["$ref"] == "#n:1" + + # Check that constraint and index are None (no key property) + assert constraint is None + assert index is None + + def test_to_aura_data_import_relationship_with_properties(self): + """Test converting a relationship with properties to Aura Data Import format.""" + key_prop = Property(name="relationshipId", type="INTEGER") + other_prop = Property(name="strength", type="FLOAT") + + relationship = Relationship( + type="CONNECTED", + start_node_label="NodeA", + end_node_label="NodeB", + key_property=key_prop, + properties=[other_prop], + ) + + rel_type, rel_obj, constraint, index = relationship.to_aura_data_import( + "rt:2", "r:2", "n:0", "n:1", "c:5", "i:5" + ) + + # Check relationship type has properties + assert len(rel_type["properties"]) == 2 + assert rel_type["properties"][0]["token"] == "relationshipId" + assert not rel_type["properties"][0]["nullable"] # Key property + assert rel_type["properties"][1]["token"] == "strength" + assert rel_type["properties"][1]["nullable"] # Non-key property + + # Check constraint (should exist since relationship has key property) + assert constraint is not None + assert constraint["$id"] == "c:5" + assert constraint["name"] == "CONNECTED_constraint" + assert constraint["constraintType"] == "uniqueness" + assert constraint["entityType"] == "relationship" + assert constraint["relationshipType"]["$ref"] == "#rt:2" + + # Check index (should exist since relationship has key property) + assert index is not None + assert index["$id"] == "i:5" + assert index["name"] == "CONNECTED_index" + assert index["indexType"] == "default" + assert index["entityType"] == "relationship" + assert index["relationshipType"]["$ref"] == "#rt:2" + + def test_relationship_source_info_export(self): + """Test that relationship property source information is properly exported.""" + # Create nodes with source information + country_source = PropertySource( + column_name="country_id", + table_name="countries.csv", + location="local", + source_type="local", + ) + + country_key_prop = Property( + name="id", + type="INTEGER", + source=country_source, + description="Country identifier", + ) + + country_node = Node( + label="Country", key_property=country_key_prop, properties=[] + ) + + region_source = PropertySource( + column_name="region_name", + table_name="regions.csv", + location="local", + source_type="local", + ) + + region_key_prop = Property( + name="name", type="STRING", source=region_source, description="Region name" + ) + + region_node = Node(label="Region", key_property=region_key_prop, properties=[]) + + # Create relationship with property that has different source table + rel_prop_source = PropertySource( + column_name="connection_weight", + table_name="country_region_connections.csv", + location="local", + source_type="local", + ) + + rel_prop = Property( + name="weight", + type="FLOAT", + source=rel_prop_source, + description="Connection weight", + ) + + relationship = Relationship( + type="BELONGS_TO", + start_node_label="Country", + end_node_label="Region", + properties=[rel_prop], + ) + + # Create data model and export + data_model = DataModel( + nodes=[country_node, region_node], relationships=[relationship] + ) + aura_dict = data_model.to_aura_data_import_dict() + + # Verify that relationship uses its own table name, not the source node's table + rel_mappings = aura_dict["dataModel"]["graphMappingRepresentation"][ + "relationshipMappings" + ] + assert len(rel_mappings) == 1 + assert rel_mappings[0]["tableName"] == "country_region_connections.csv" + + # Verify that relationship property field name is correct + rel_prop_mappings = rel_mappings[0]["propertyMappings"] + assert len(rel_prop_mappings) == 1 + assert rel_prop_mappings[0]["fieldName"] == "connection_weight" + + # Verify that node mappings still use their own table names + node_mappings = aura_dict["dataModel"]["graphMappingRepresentation"][ + "nodeMappings" + ] + assert len(node_mappings) == 2 + assert node_mappings[0]["tableName"] == "countries.csv" + assert node_mappings[1]["tableName"] == "regions.csv" + + +class TestDataModelConversion: + """Test DataModel conversion methods.""" + + def test_from_aura_data_import_full_model(self, sample_aura_data_import_model): + """Test converting the full sample Aura Data Import model.""" + data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) + + # Check nodes + assert len(data_model.nodes) == 5 + node_labels = [n.label for n in data_model.nodes] + assert "Country" in node_labels + assert "SubRegion" in node_labels + assert "Region" in node_labels + assert "TimeZones" in node_labels + assert "Currency" in node_labels + + # Check relationships + assert len(data_model.relationships) == 4 + rel_types = [r.type for r in data_model.relationships] + assert "IN_SUBREGION" in rel_types + assert "IN_REGION" in rel_types + assert "IN_TIMEZONE" in rel_types + assert "USES_CURRENCY" in rel_types + + def test_from_aura_data_import_node_key_properties( + self, sample_aura_data_import_model + ): + """Test that node key properties are correctly identified.""" + data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) + + # Find specific nodes and check their key properties + country_node = next(n for n in data_model.nodes if n.label == "Country") + assert country_node.key_property.name == "countryId" + assert country_node.key_property.type == "INTEGER" + + region_node = next(n for n in data_model.nodes if n.label == "Region") + assert region_node.key_property.name == "region" + assert region_node.key_property.type == "STRING" + + def test_to_aura_data_import_dict_structure(self): + """Test the structure of the exported Aura Data Import dictionary.""" + # Create a simple data model + key_prop = Property(name="id", type="INTEGER") + node1 = Node(label="TestNode", key_property=key_prop) + + rel = Relationship( + type="TEST_REL", start_node_label="TestNode", end_node_label="TestNode" + ) + + data_model = DataModel(nodes=[node1], relationships=[rel]) + + result = data_model.to_aura_data_import_dict() + + # Check top-level structure + assert "version" in result + assert "visualisation" in result + assert "dataModel" in result + + # Check visualization structure + assert "nodes" in result["visualisation"] + assert len(result["visualisation"]["nodes"]) == 1 + + # Check data model structure + data_model_content = result["dataModel"] + assert "graphSchemaRepresentation" in data_model_content + assert "graphSchemaExtensionsRepresentation" in data_model_content + assert "graphMappingRepresentation" in data_model_content + assert "configurations" in data_model_content + + # Check graph schema + graph_schema = data_model_content["graphSchemaRepresentation"]["graphSchema"] + assert "nodeLabels" in graph_schema + assert "relationshipTypes" in graph_schema + assert "nodeObjectTypes" in graph_schema + assert "relationshipObjectTypes" in graph_schema + assert "constraints" in graph_schema + assert "indexes" in graph_schema + + def test_to_aura_data_import_dict_node_constraints_and_indexes(self): + """Test that constraints and indexes are properly generated.""" + key_prop = Property(name="userId", type="INTEGER") + node = Node(label="User", key_property=key_prop) + data_model = DataModel(nodes=[node]) + + result = data_model.to_aura_data_import_dict() + + graph_schema = result["dataModel"]["graphSchemaRepresentation"]["graphSchema"] + + # Check constraints + assert len(graph_schema["constraints"]) == 1 + constraint = graph_schema["constraints"][0] + assert constraint["name"] == "User_constraint" + assert constraint["constraintType"] == "uniqueness" + assert constraint["entityType"] == "node" + + # Check indexes + assert len(graph_schema["indexes"]) == 1 + index = graph_schema["indexes"][0] + assert index["name"] == "User_index" + assert index["indexType"] == "default" + assert index["entityType"] == "node" + + def test_round_trip_conversion_simple(self): + """Test that a simple model can be converted to Aura format and back.""" + # Create original model + key_prop = Property(name="id", type="STRING") + node = Node(label="TestNode", key_property=key_prop) + original_model = DataModel(nodes=[node]) + + # Convert to Aura format + aura_dict = original_model.to_aura_data_import_dict() + + # Convert back + converted_model = DataModel.from_aura_data_import(aura_dict) + + # Check that essential structure is preserved + assert len(converted_model.nodes) == 1 + assert converted_model.nodes[0].label == "TestNode" + assert converted_model.nodes[0].key_property.name == "id" + assert converted_model.nodes[0].key_property.type == "STRING" + + def test_round_trip_conversion_with_relationships(self): + """Test round-trip conversion with relationships.""" + # Create original model + key_prop1 = Property(name="id1", type="INTEGER") + key_prop2 = Property(name="id2", type="STRING") + node1 = Node(label="Node1", key_property=key_prop1) + node2 = Node(label="Node2", key_property=key_prop2) + + rel = Relationship( + type="CONNECTS", start_node_label="Node1", end_node_label="Node2" + ) + + original_model = DataModel(nodes=[node1, node2], relationships=[rel]) + + # Convert to Aura format and back + aura_dict = original_model.to_aura_data_import_dict() + converted_model = DataModel.from_aura_data_import(aura_dict) + + # Check nodes + assert len(converted_model.nodes) == 2 + node_labels = [n.label for n in converted_model.nodes] + assert "Node1" in node_labels + assert "Node2" in node_labels + + # Check relationships + assert len(converted_model.relationships) == 1 + assert converted_model.relationships[0].type == "CONNECTS" + assert converted_model.relationships[0].start_node_label == "Node1" + assert converted_model.relationships[0].end_node_label == "Node2" + + def test_json_serialization(self, sample_aura_data_import_model): + """Test that the converted model can be serialized to JSON.""" + data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) + json_str = data_model.to_aura_data_import_json_str() + + # Should be valid JSON + parsed = json.loads(json_str) + assert isinstance(parsed, dict) + assert "dataModel" in parsed + + def test_metadata_preservation_round_trip(self, sample_aura_data_import_model): + """Test that metadata (constraints, indexes, version, configurations) is preserved during round-trip conversion.""" + # Convert from Aura Data Import to our model + data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) + + # Verify metadata was stored + assert "aura_data_import" in data_model.metadata + aura_metadata = data_model.metadata["aura_data_import"] + + # Check that all expected metadata fields are present + assert "version" in aura_metadata + assert "dataModel_version" in aura_metadata + assert "constraints" in aura_metadata + assert "indexes" in aura_metadata + assert "configurations" in aura_metadata + assert "dataSourceSchema" in aura_metadata + + # Verify the stored values match the original + assert aura_metadata["version"] == sample_aura_data_import_model["version"] + assert ( + aura_metadata["dataModel_version"] + == sample_aura_data_import_model["dataModel"]["version"] + ) + assert ( + aura_metadata["constraints"] + == sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ]["constraints"] + ) + assert ( + aura_metadata["indexes"] + == sample_aura_data_import_model["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ]["indexes"] + ) + assert ( + aura_metadata["configurations"] + == sample_aura_data_import_model["dataModel"]["configurations"] + ) + + # Check that visualization data was stored in node metadata + original_vis_nodes = sample_aura_data_import_model["visualisation"]["nodes"] + for i, node in enumerate(data_model.nodes): + node_id = f"n:{i}" + original_vis_node = next( + (v for v in original_vis_nodes if v["id"] == node_id), None + ) + if original_vis_node: + assert "visualization" in node.metadata + assert ( + node.metadata["visualization"]["position"] + == original_vis_node["position"] + ) + + # Convert back to Aura Data Import + converted_back = data_model.to_aura_data_import_dict() + + # Verify that the metadata was restored + assert converted_back["version"] == sample_aura_data_import_model["version"] + assert ( + converted_back["dataModel"]["version"] + == sample_aura_data_import_model["dataModel"]["version"] + ) + assert ( + converted_back["dataModel"]["configurations"] + == sample_aura_data_import_model["dataModel"]["configurations"] + ) + + # Verify that visualization was reconstructed correctly + assert "visualisation" in converted_back + assert "nodes" in converted_back["visualisation"] + assert len(converted_back["visualisation"]["nodes"]) == len(data_model.nodes) + + # Check that positions were preserved for existing nodes + original_vis_nodes = sample_aura_data_import_model["visualisation"]["nodes"] + converted_vis_nodes = converted_back["visualisation"]["nodes"] + for original_vis_node in original_vis_nodes: + converted_vis_node = next( + (v for v in converted_vis_nodes if v["id"] == original_vis_node["id"]), + None, + ) + if converted_vis_node: + assert converted_vis_node["position"] == original_vis_node["position"] + + # Check that constraints and indexes were preserved + original_constraints = sample_aura_data_import_model["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"]["constraints"] + converted_constraints = converted_back["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"]["constraints"] + assert converted_constraints == original_constraints + + original_indexes = sample_aura_data_import_model["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"]["indexes"] + converted_indexes = converted_back["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ]["indexes"] + assert converted_indexes == original_indexes + + def test_export_without_metadata_uses_defaults(self): + """Test that exporting a DataModel without Aura metadata uses appropriate defaults.""" + # Create a simple DataModel from scratch (no metadata) + key_prop = Property(name="id", type="INTEGER") + node = Node(label="TestNode", key_property=key_prop) + data_model = DataModel(nodes=[node]) + + # Export to Aura Data Import format + aura_dict = data_model.to_aura_data_import_dict() + + # Verify default values are used + assert aura_dict["version"] == "2.3.1-beta.0" + assert aura_dict["dataModel"]["version"] == "2.3.1-beta.0" + assert aura_dict["dataModel"]["configurations"] == {"idsToIgnore": []} + + # Verify that table schemas are automatically generated (not empty) + data_source_schema = aura_dict["dataModel"]["graphMappingRepresentation"][ + "dataSourceSchema" + ] + assert data_source_schema["type"] == "local" + assert len(data_source_schema["tableSchemas"]) == 1 + assert data_source_schema["tableSchemas"][0]["name"] == "testnode.csv" + assert len(data_source_schema["tableSchemas"][0]["fields"]) == 1 + assert data_source_schema["tableSchemas"][0]["fields"][0]["name"] == "id" + + # Verify visualization nodes are generated + assert "visualisation" in aura_dict + assert "nodes" in aura_dict["visualisation"] + assert len(aura_dict["visualisation"]["nodes"]) == 1 + assert aura_dict["visualisation"]["nodes"][0]["id"] == "n:0" + + # Verify constraints and indexes are generated for the node + graph_schema = aura_dict["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ] + assert len(graph_schema["constraints"]) == 1 + assert len(graph_schema["indexes"]) == 1 + assert graph_schema["constraints"][0]["name"] == "TestNode_constraint" + assert graph_schema["indexes"][0]["name"] == "TestNode_index" + + def test_visualization_reconstruction_with_new_nodes( + self, sample_aura_data_import_model + ): + """Test that visualization is properly reconstructed when new nodes are added.""" + # Convert from Aura Data Import to our model + data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) + + # Add a new node that wasn't in the original data + new_key_prop = Property(name="newId", type="STRING") + new_node = Node(label="NewNode", key_property=new_key_prop) + data_model.add_node(new_node) + + # Convert back to Aura Data Import + converted_back = data_model.to_aura_data_import_dict() + + # Verify visualization includes all nodes (original + new) + vis_nodes = converted_back["visualisation"]["nodes"] + assert len(vis_nodes) == len(data_model.nodes) + + # Check that original nodes kept their positions + original_vis_nodes = sample_aura_data_import_model["visualisation"]["nodes"] + for original_vis_node in original_vis_nodes: + converted_vis_node = next( + (v for v in vis_nodes if v["id"] == original_vis_node["id"]), None + ) + if converted_vis_node: + assert converted_vis_node["position"] == original_vis_node["position"] + + # Check that new node got a default position + new_node_id = ( + f"n:{len(data_model.nodes) - 1}" # Last node should be the new one + ) + new_vis_node = next((v for v in vis_nodes if v["id"] == new_node_id), None) + assert new_vis_node is not None + assert "position" in new_vis_node + assert isinstance(new_vis_node["position"]["x"], float) + assert isinstance(new_vis_node["position"]["y"], float) + + +class TestEdgeCases: + """Test edge cases and error conditions.""" + + def test_empty_data_model_conversion(self): + """Test converting an empty data model.""" + empty_model = DataModel() + + aura_dict = empty_model.to_aura_data_import_dict() + + # Should have basic structure even when empty + assert "version" in aura_dict + assert "visualisation" in aura_dict + assert len(aura_dict["visualisation"]["nodes"]) == 0 + + graph_schema = aura_dict["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ] + assert len(graph_schema["nodeLabels"]) == 0 + assert len(graph_schema["relationshipTypes"]) == 0 + + def test_node_with_no_properties_mapping(self, sample_node_label): + """Test handling of node with missing property mappings.""" + empty_mapping = { + "node": {"$ref": "#n:1"}, + "tableName": "unknown", + "propertyMappings": [], + } + + # Should raise error when property is not found in mapping + with pytest.raises(ValueError): + Node.from_aura_data_import( + sample_node_label, "subregion", empty_mapping, "local" + ) + + def test_malformed_aura_data_missing_required_fields(self): + """Test handling of malformed Aura Data Import data.""" + malformed_data = { + "version": "2.3.1-beta.0", + # Missing visualisation and dataModel + } + + with pytest.raises(KeyError): + DataModel.from_aura_data_import(malformed_data) + + def test_property_type_edge_cases(self, sample_source_mapping): + """Test property type conversion edge cases.""" + # Test with unknown type + unknown_type_prop = { + "$id": "p:unknown", + "token": "unknown", + "type": {"type": "unknown_type"}, + "nullable": False, + } + + prop = Property.from_aura_data_import(unknown_type_prop, sample_source_mapping) + assert prop.type == "UNKNOWN_TYPE" # Should uppercase unknown types + + # Test conversion back + result = prop.to_aura_data_import("p:test", is_key=False) + assert result["type"]["type"] == "string" # Should default to string + + +def test_aura_data_import_round_trip_data_integrity( + sample_aura_data_import_model: dict[str, Any], +) -> None: + """Test that Aura Data Import model preserves essential data integrity through round-trip conversion.""" + # Load the model + data_model = DataModel.from_aura_data_import(sample_aura_data_import_model) + + # Convert back to Aura Data Import format + converted_back = data_model.to_aura_data_import_dict() + + # Check top-level structure + assert converted_back["version"] == sample_aura_data_import_model["version"] + assert ( + converted_back["dataModel"]["version"] + == sample_aura_data_import_model["dataModel"]["version"] + ) + + # Check that all nodes are preserved + original_node_labels = { + nl["token"] + for nl in sample_aura_data_import_model["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"]["nodeLabels"] + } + converted_node_labels = { + nl["token"] + for nl in converted_back["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ]["nodeLabels"] + } + assert original_node_labels == converted_node_labels + + # Check that all relationships are preserved + original_rel_types = { + rt["token"] + for rt in sample_aura_data_import_model["dataModel"][ + "graphSchemaRepresentation" + ]["graphSchema"]["relationshipTypes"] + } + converted_rel_types = { + rt["token"] + for rt in converted_back["dataModel"]["graphSchemaRepresentation"][ + "graphSchema" + ]["relationshipTypes"] + } + assert original_rel_types == converted_rel_types + + # Check that visualization nodes are preserved for all nodes + assert len(converted_back["visualisation"]["nodes"]) == len( + sample_aura_data_import_model["visualisation"]["nodes"] + ) + + # Check that metadata was preserved + assert ( + converted_back["dataModel"]["configurations"] + == sample_aura_data_import_model["dataModel"]["configurations"] + )