Skip to content

Commit fd0cf1d

Browse files
authored
Merge pull request #42 from linkml/ref-integ
2 parents e4aa4ba + 993845b commit fd0cf1d

30 files changed

+1123
-328
lines changed

docs/how-to/Check-Referential-Integrity.ipynb

Lines changed: 489 additions & 80 deletions
Large diffs are not rendered by default.

docs/how-to/Index-Bioinformatics-Databases.ipynb

Lines changed: 290 additions & 0 deletions
Large diffs are not rendered by default.

src/linkml_store/api/client.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
logger = logging.getLogger(__name__)
1313

1414

15-
1615
HANDLE_MAP = {
1716
"duckdb": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
1817
"sqlite": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
@@ -220,14 +219,14 @@ def attach_database(
220219
scheme, _ = handle.split(":", 1)
221220
if scheme not in HANDLE_MAP:
222221
raise ValueError(f"Unknown scheme: {scheme}")
223-
module_path, class_name = HANDLE_MAP[scheme].rsplit('.', 1)
222+
module_path, class_name = HANDLE_MAP[scheme].rsplit(".", 1)
224223
try:
225224
module = importlib.import_module(module_path)
226225
cls = getattr(module, class_name)
227226
except ImportError as e:
228227
raise ImportError(f"Failed to import {scheme} database. Make sure the correct extras are installed: {e}")
229228

230-
#cls = HANDLE_MAP[scheme]
229+
# cls = HANDLE_MAP[scheme]
231230
db = cls(handle=handle, recreate_if_exists=recreate_if_exists, **kwargs)
232231
if schema_view:
233232
db.set_schema_view(schema_view)

src/linkml_store/api/collection.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
211211
"""
212212
raise NotImplementedError
213213

214-
def index (
214+
def index(
215215
self,
216216
objs: Union[OBJECT, List[OBJECT]],
217217
index_name: Optional[str] = None,
@@ -231,10 +231,13 @@ def index (
231231
"""
232232
raise NotImplementedError
233233

234-
def upsert(self,
235-
objs: Union[OBJECT, List[OBJECT]],
236-
filter_fields: List[str],
237-
update_fields: Union[List[str], None] = None, **kwargs):
234+
def upsert(
235+
self,
236+
objs: Union[OBJECT, List[OBJECT]],
237+
filter_fields: List[str],
238+
update_fields: Union[List[str], None] = None,
239+
**kwargs,
240+
):
238241
"""
239242
Add one or more objects to the collection.
240243
@@ -455,10 +458,10 @@ def get_one(self, id: IDENTIFIER, **kwargs) -> Optional[OBJECT]:
455458
return None
456459

457460
def find(
458-
self,
459-
where: Optional[Any] = None,
460-
select_cols: Optional[List[str] ] = None,
461-
**kwargs,
461+
self,
462+
where: Optional[Any] = None,
463+
select_cols: Optional[List[str]] = None,
464+
**kwargs,
462465
) -> QueryResult:
463466
"""
464467
Find objects in the collection using a where query.
@@ -596,13 +599,15 @@ def search(
596599
assert ix_coll.size() > 0
597600
qr = ix_coll.find(where=where, limit=-1, **kwargs)
598601
index_col = ix.index_field
602+
599603
# TODO: optimize this for large indexes
600604
def row2array(row):
601605
v = row[index_col]
602606
if isinstance(v, str):
603607
# sqlite stores arrays as strings
604608
v = json.loads(v)
605609
return np.array(v, dtype=float)
610+
606611
vector_pairs = [(row, row2array(row)) for row in qr.rows]
607612
results = ix.search(query, vector_pairs, limit=limit, mmr_relevance_factor=mmr_relevance_factor, **kwargs)
608613
for r in results:
@@ -618,12 +623,12 @@ def row2array(row):
618623
return new_qr
619624

620625
def group_by(
621-
self,
622-
group_by_fields: List[str],
623-
inlined_field = "objects",
624-
agg_map: Optional[Dict[str, str]] = None,
625-
where: Optional[Dict] = None,
626-
**kwargs,
626+
self,
627+
group_by_fields: List[str],
628+
inlined_field="objects",
629+
agg_map: Optional[Dict[str, str]] = None,
630+
where: Optional[Dict] = None,
631+
**kwargs,
627632
) -> QueryResult:
628633
"""
629634
Group objects in the collection by a column.
@@ -650,14 +655,9 @@ def group_by(
650655
top_obj = {k: v for k, v in zip(pk_fields, pk)}
651656
top_obj[inlined_field] = objs
652657
results.append(top_obj)
653-
r = QueryResult(
654-
num_rows=len(results),
655-
rows=results
656-
)
658+
r = QueryResult(num_rows=len(results), rows=results)
657659
return r
658660

659-
660-
661661
@property
662662
def is_internal(self) -> bool:
663663
"""

src/linkml_store/api/database.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,31 @@ def induce_schema_view(self) -> SchemaView:
595595
sb.add_class(coll.target_class_name)
596596
return SchemaView(sb.schema)
597597

598-
def iter_validate_database(self, **kwargs) -> Iterator["ValidationResult"]:
598+
def validate_database(self, **kwargs) -> List["ValidationResult"]:
599+
"""
600+
Validate the contents of the database.
601+
602+
As `iter_validate_database`, but returns a list of validation results.
603+
604+
:param kwargs:
605+
:return:
606+
"""
607+
return list(self.iter_validate_database(**kwargs))
608+
609+
def validate_database(self, **kwargs) -> List["ValidationResult"]:
610+
"""
611+
Validate the contents of the database.
612+
613+
As `iter_validate_database`, but returns a list of validation results.
614+
615+
:param kwargs:
616+
:return:
617+
"""
618+
return list(self.iter_validate_database(**kwargs))
619+
620+
def iter_validate_database(
621+
self, ensure_referential_integrity: bool = None, **kwargs
622+
) -> Iterator["ValidationResult"]:
599623
"""
600624
Validate the contents of the database.
601625
@@ -635,12 +659,14 @@ def iter_validate_database(self, **kwargs) -> Iterator["ValidationResult"]:
635659
'capital' is a required property
636660
'continent' is a required proper
637661
662+
:param ensure_referential_integrity: ensure referential integrity
638663
:param kwargs:
639664
:return: iterator over validation results
640665
"""
641666
for collection in self.list_collections():
642667
yield from collection.iter_validate_collection(**kwargs)
643-
if self.metadata.ensure_referential_integrity:
668+
if self.metadata.ensure_referential_integrity or ensure_referential_integrity:
669+
logger.info(f"Validating referential integrity on {self.alias}")
644670
yield from self._validate_referential_integrity(**kwargs)
645671

646672
def _validate_referential_integrity(self, **kwargs) -> Iterator["ValidationResult"]:
@@ -661,7 +687,9 @@ def _validate_referential_integrity(self, **kwargs) -> Iterator["ValidationResul
661687
induced_slots = sv.class_induced_slots(cd.name)
662688
slot_map = {s.name: s for s in induced_slots}
663689
# rmap = {s.name: s.range for s in induced_slots}
690+
# map slot ranges to a collection where that range is stored
664691
sr_to_coll = {s.name: cmap.get(s.range, []) for s in induced_slots if s.range}
692+
logger.debug(f"Validating referential integrity for {collection.target_class_name} // {sr_to_coll}")
665693
for obj in collection.find_iter():
666694
for k, v in obj.items():
667695
if k not in sr_to_coll:

src/linkml_store/api/stores/duckdb/duckdb_database.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,9 @@ def _table_exists(self, table: str) -> bool:
100100
meta_query = Query(
101101
from_table="sqlite_master",
102102
where_clause={
103-
#"type": "table",
103+
# "type": "table",
104104
"name": table,
105-
}
105+
},
106106
)
107107
else:
108108
if table.startswith("information_schema"):
@@ -112,7 +112,7 @@ def _table_exists(self, table: str) -> bool:
112112
where_clause={
113113
"table_type": "BASE TABLE",
114114
"table_name": table,
115-
}
115+
},
116116
)
117117

118118
qr = self.query(meta_query)

src/linkml_store/api/stores/filesystem/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
Handles have the form:
55
66
- ``file:<path>`` for a local file
7-
"""
7+
"""
88

99
from linkml_store.api.stores.filesystem.filesystem_collection import FileSystemCollection
1010
from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase

src/linkml_store/api/stores/mongodb/mongodb_collection.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,14 @@ def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
4141
del obj["_id"]
4242
self._post_insert_hook(objs)
4343

44-
45-
def index(self,
46-
objs: Union[OBJECT, List[OBJECT]],
47-
index_name: Optional[str] = None,
48-
replace: bool = False,
49-
unique: bool = False,
50-
**kwargs):
44+
def index(
45+
self,
46+
objs: Union[OBJECT, List[OBJECT]],
47+
index_name: Optional[str] = None,
48+
replace: bool = False,
49+
unique: bool = False,
50+
**kwargs,
51+
):
5152
"""
5253
Create indexes on the collection.
5354
@@ -86,11 +87,13 @@ def index(self,
8687
else:
8788
logging.debug(f"Index already exists for field {obj}, skipping creation.")
8889

89-
def upsert(self,
90-
objs: Union[OBJECT, List[OBJECT]],
91-
filter_fields: List[str],
92-
update_fields: Optional[List[str]] = None,
93-
**kwargs):
90+
def upsert(
91+
self,
92+
objs: Union[OBJECT, List[OBJECT]],
93+
filter_fields: List[str],
94+
update_fields: Optional[List[str]] = None,
95+
**kwargs,
96+
):
9497
"""
9598
Upsert one or more documents into the MongoDB collection.
9699

src/linkml_store/api/stores/mongodb/mongodb_database.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ def _db_name(self) -> str:
4242
parsed_url = urlparse(self.handle)
4343
path_parts = parsed_url.path.lstrip("/").split("?")[0].split("/")
4444
db_name = path_parts[0] if path_parts else "default"
45+
if not db_name:
46+
db_name = self.alias
4547
else:
4648
db_name = "default"
4749
return db_name

src/linkml_store/api/stores/solr/solr_collection.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,11 @@ def query(self, query: Query, **kwargs) -> QueryResult:
6363

6464
def query_facets(
6565
self,
66-
where: Optional[Dict] = None,
67-
facet_columns: List[str] = None,
68-
facet_limit=DEFAULT_FACET_LIMIT,
69-
facet_min_count: int = 1,
70-
**kwargs
66+
where: Optional[Dict] = None,
67+
facet_columns: List[str] = None,
68+
facet_limit=DEFAULT_FACET_LIMIT,
69+
facet_min_count: int = 1,
70+
**kwargs,
7171
) -> Dict[str, Dict[str, int]]:
7272
solr_query = self._build_solr_query(where)
7373
solr_query["facet"] = "true"

0 commit comments

Comments
 (0)