diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index 461972cf..226e64dd 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -100,7 +100,7 @@ def make(self, key): 1. Fetch data from tables above in the dependency hierarchy, restricted by the given key. 2. Compute secondary attributes based on the fetched data. - 3. Insert the new tuples into the current table. + 3. Insert the new tuple(s) into the current table. The method can be implemented either as: (a) Regular method: All three steps are performed in a single database transaction. @@ -146,7 +146,8 @@ def make(self, key): ): # user must implement `make` raise NotImplementedError( - "Subclasses of AutoPopulate must implement the method `make` or (`make_fetch` + `make_compute` + `make_insert`)" + "Subclasses of AutoPopulate must implement the method `make` " + "or (`make_fetch` + `make_compute` + `make_insert`)" ) # User has implemented `_fetch`, `_compute`, and `_insert` methods instead @@ -265,9 +266,8 @@ def populate( self.connection.schemas[self.target.database].jobs if reserve_jobs else None ) - # define and set up signal handler for SIGTERM: if reserve_jobs: - + # Define a signal handler for SIGTERM def handler(signum, frame): logger.info("Populate terminated by SIGTERM") raise SystemExit("SIGTERM received") diff --git a/datajoint/blob.py b/datajoint/blob.py index 82e1c3d1..63978968 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -140,7 +140,7 @@ def read_blob(self, n_bytes=None): "S": self.read_struct, # matlab struct array "C": self.read_cell_array, # matlab cell array # basic data types - "\xFF": self.read_none, # None + "\xff": self.read_none, # None "\x01": self.read_tuple, # a Sequence (e.g. tuple) "\x02": self.read_list, # a MutableSequence (e.g. list) "\x03": self.read_set, # a Set @@ -401,7 +401,7 @@ def read_none(self): @staticmethod def pack_none(): - return b"\xFF" + return b"\xff" def read_tuple(self): return tuple( diff --git a/datajoint/condition.py b/datajoint/condition.py index 7fbe0c7b..96cfbb6e 100644 --- a/datajoint/condition.py +++ b/datajoint/condition.py @@ -1,4 +1,4 @@ -""" methods for generating SQL WHERE clauses from datajoint restriction conditions """ +"""methods for generating SQL WHERE clauses from datajoint restriction conditions""" import collections import datetime diff --git a/datajoint/preview.py b/datajoint/preview.py index 77557043..564c92a0 100644 --- a/datajoint/preview.py +++ b/datajoint/preview.py @@ -1,4 +1,4 @@ -""" methods for generating previews of query expression results in python command line and Jupyter """ +"""methods for generating previews of query expression results in python command line and Jupyter""" from .settings import config diff --git a/datajoint/table.py b/datajoint/table.py index ff51170e..7e3e0c3a 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -137,7 +137,7 @@ def alter(self, prompt=True, context=None): sql, external_stores = alter(self.definition, old_definition, context) if not sql: if prompt: - logger.warn("Nothing to alter.") + logger.warning("Nothing to alter.") else: sql = "ALTER TABLE {tab}\n\t".format( tab=self.full_table_name @@ -520,7 +520,13 @@ def cascade(table): try: delete_count = table.delete_quick(get_count=True) except IntegrityError as error: - match = foreign_key_error_regexp.match(error.args[0]).groupdict() + match = foreign_key_error_regexp.match(error.args[0]) + if match is None: + raise DataJointError( + "Cascading deletes failed because the error message is missing foreign key information." + "Make sure you have REFERENCES privilege to all dependent tables." + ) from None + match = match.groupdict() # if schema name missing, use table if "`.`" not in match["child"]: match["child"] = "{}.{}".format( @@ -643,7 +649,7 @@ def cascade(table): # Confirm and commit if delete_count == 0: if safemode: - logger.warn("Nothing to delete.") + logger.warning("Nothing to delete.") if transaction: self.connection.cancel_transaction() elif not transaction: @@ -653,12 +659,12 @@ def cascade(table): if transaction: self.connection.commit_transaction() if safemode: - logger.info("Deletes committed.") + logger.info("Delete committed.") else: if transaction: self.connection.cancel_transaction() if safemode: - logger.warn("Deletes cancelled") + logger.warning("Delete cancelled") return delete_count def drop_quick(self): @@ -726,11 +732,6 @@ def size_on_disk(self): ).fetchone() return ret["Data_length"] + ret["Index_length"] - def show_definition(self): - raise AttributeError( - "show_definition is deprecated. Use the describe method instead." - ) - def describe(self, context=None, printout=False): """ :return: the definition string for the query using DataJoint DDL. diff --git a/datajoint/utils.py b/datajoint/utils.py index 1d89d527..c3453668 100644 --- a/datajoint/utils.py +++ b/datajoint/utils.py @@ -147,3 +147,5 @@ def parse_sql(filepath): if line.endswith(delimiter): yield " ".join(statement) statement = [] + if statement: + yield " ".join(statement) diff --git a/docs/src/concepts/data-model.md b/docs/src/concepts/data-model.md index 14528fe0..90460361 100644 --- a/docs/src/concepts/data-model.md +++ b/docs/src/concepts/data-model.md @@ -54,7 +54,7 @@ columns (often called attributes). A collection of base relations with their attributes, domain constraints, uniqueness constraints, and referential constraints is called a schema. -**Domain constraints:** +**Domain constraints:** Each attribute (column) in a table is associated with a specific attribute domain (or datatype, a set of possible values), ensuring that the data entered is valid. Attribute domains may not include relations, which keeps the data model @@ -68,13 +68,13 @@ columns (often called attributes). One key in a relation is designated as the primary key used for referencing its elements. **Referential constraints:** - Associations among data are established by means of referential constraints with the + Associations among data are established by means of referential constraints with the help of foreign keys. A referential constraint on relation A referencing relation B allows only those entities in A whose foreign key attributes match the key attributes of an entity in B. **Declarative queries:** - Data queries are formulated through declarative, as opposed to imperative, + Data queries are formulated through declarative, as opposed to imperative, specifications of sought results. This means that query expressions convey the logic for the result rather than the procedure for obtaining it. @@ -106,7 +106,7 @@ clarity, efficiency, workflow management, and precise and flexible data queries. By enforcing entity normalization, simplifying dependency declarations, offering a rich query algebra, and visualizing relationships through schema diagrams, DataJoint makes relational database programming -more intuitive and robust for complex data pipelines. +more intuitive and robust for complex data pipelines. The model has emerged over a decade of continuous development of complex data pipelines for neuroscience experiments ([Yatsenko et al., @@ -123,7 +123,7 @@ DataJoint comprises: + a schema [definition](../design/tables/declare.md) language + a data [manipulation](../manipulation/index.md) language + a data [query](../query/principles.md) language -+ a [diagramming](../design/diagrams.md) notation for visualizing relationships between ++ a [diagramming](../design/diagrams.md) notation for visualizing relationships between modeled entities The key refinement of DataJoint over other relational data models and their diff --git a/docs/src/concepts/teamwork.md b/docs/src/concepts/teamwork.md index 4cccea9f..a0a782dd 100644 --- a/docs/src/concepts/teamwork.md +++ b/docs/src/concepts/teamwork.md @@ -60,33 +60,33 @@ division of labor among team members, leading to greater efficiency and better s ### Scientists Design and conduct experiments, collecting data. -They interact with the data pipeline through graphical user interfaces designed by +They interact with the data pipeline through graphical user interfaces designed by others. They understand what analysis is used to test their hypotheses. ### Data scientists -Have the domain expertise and select and implement the processing and analysis +Have the domain expertise and select and implement the processing and analysis methods for experimental data. -Data scientists are in charge of defining and managing the data pipeline using -DataJoint's data model, but they may not know the details of the underlying +Data scientists are in charge of defining and managing the data pipeline using +DataJoint's data model, but they may not know the details of the underlying architecture. -They interact with the pipeline using client programming interfaces directly from +They interact with the pipeline using client programming interfaces directly from languages such as MATLAB and Python. -The bulk of this manual is written for working data scientists, except for System +The bulk of this manual is written for working data scientists, except for System Administration. ### Data engineers Work with the data scientists to support the data pipeline. -They rely on their understanding of the DataJoint data model to configure and -administer the required IT resources such as database servers, data storage +They rely on their understanding of the DataJoint data model to configure and +administer the required IT resources such as database servers, data storage servers, networks, cloud instances, [Globus](https://globus.org) endpoints, etc. -Data engineers can provide general solutions such as web hosting, data publishing, +Data engineers can provide general solutions such as web hosting, data publishing, interfaces, exports and imports. -The System Administration section of this tutorial contains materials helpful in +The System Administration section of this tutorial contains materials helpful in accomplishing these tasks. DataJoint is designed to delineate a clean boundary between **data science** and **data diff --git a/docs/src/design/integrity.md b/docs/src/design/integrity.md index 299a2a45..cb712275 100644 --- a/docs/src/design/integrity.md +++ b/docs/src/design/integrity.md @@ -1,7 +1,7 @@ # Data Integrity -The term **data integrity** describes guarantees made by the data management process -that prevent errors and corruption in data due to technical failures and human errors +The term **data integrity** describes guarantees made by the data management process +that prevent errors and corruption in data due to technical failures and human errors arising in the course of continuous use by multiple agents. DataJoint pipelines respect the following forms of data integrity: **entity integrity**, **referential integrity**, and **group integrity** as described in more diff --git a/docs/src/design/tables/customtype.md b/docs/src/design/tables/customtype.md index 823dd987..aad194ff 100644 --- a/docs/src/design/tables/customtype.md +++ b/docs/src/design/tables/customtype.md @@ -49,9 +49,9 @@ attribute type in a datajoint table class: import datajoint as dj class GraphAdapter(dj.AttributeAdapter): - + attribute_type = 'longblob' # this is how the attribute will be declared - + def put(self, obj): # convert the nx.Graph object into an edge list assert isinstance(obj, nx.Graph) @@ -60,7 +60,7 @@ class GraphAdapter(dj.AttributeAdapter): def get(self, value): # convert edge list back into an nx.Graph return nx.Graph(value) - + # instantiate for use as a datajoint type graph = GraphAdapter() @@ -75,6 +75,6 @@ class Connectivity(dj.Manual): definition = """ conn_id : int --- - conn_graph = null : # a networkx.Graph object + conn_graph = null : # a networkx.Graph object """ ``` diff --git a/docs/src/design/tables/indexes.md b/docs/src/design/tables/indexes.md index fcd1b570..9d8148c3 100644 --- a/docs/src/design/tables/indexes.md +++ b/docs/src/design/tables/indexes.md @@ -62,7 +62,7 @@ Let’s now imagine that rats in a lab are identified by the combination of `lab @schema class Rat(dj.Manual): definition = """ - lab_name : char(16) + lab_name : char(16) rat_id : int unsigned # lab-specific ID --- date_of_birth = null : date @@ -86,7 +86,7 @@ To speed up searches by the `rat_id` and `date_of_birth`, we can explicit indexe @schema class Rat2(dj.Manual): definition = """ - lab_name : char(16) + lab_name : char(16) rat_id : int unsigned # lab-specific ID --- date_of_birth = null : date diff --git a/docs/src/faq.md b/docs/src/faq.md index 1de69bb3..c4c82d01 100644 --- a/docs/src/faq.md +++ b/docs/src/faq.md @@ -7,13 +7,13 @@ It is common to enter data during experiments using a graphical user interface. 1. The [DataJoint platform](https://works.datajoint.com) platform is a web-based, end-to-end platform to host and execute data pipelines. -2. [DataJoint LabBook](https://github.com/datajoint/datajoint-labbook) is an open +2. [DataJoint LabBook](https://github.com/datajoint/datajoint-labbook) is an open source project for data entry but is no longer actively maintained. ## Does DataJoint support other programming languages? DataJoint [Python](https://docs.datajoint.com/core/datajoint-python/) is the most -up-to-date version and all future development will focus on the Python API. The +up-to-date version and all future development will focus on the Python API. The [Matlab](https://datajoint.com/docs/core/datajoint-matlab/) API was actively developed through 2023. Previous projects implemented some DataJoint features in [Julia](https://github.com/BrainCOGS/neuronex_workshop_2018/tree/julia/julia) and @@ -93,7 +93,7 @@ The entry of metadata can be manual, or it can be an automated part of data acqu into the database). Depending on their size and contents, raw data files can be stored in a number of ways. -In the simplest and most common scenario, raw data continues to be stored in either a +In the simplest and most common scenario, raw data continues to be stored in either a local filesystem or in the cloud as collections of files and folders. The paths to these files are entered in the database (again, either manually or by automated processes). @@ -101,8 +101,8 @@ This is the point at which the notion of a **data pipeline** begins. Below these "manual tables" that contain metadata and file paths are a series of tables that load raw data from these files, process it in some way, and insert derived or summarized data directly into the database. -For example, in an imaging application, the very large raw `.TIFF` stacks would reside on -the filesystem, but the extracted fluorescent trace timeseries for each cell in the +For example, in an imaging application, the very large raw `.TIFF` stacks would reside on +the filesystem, but the extracted fluorescent trace timeseries for each cell in the image would be stored as a numerical array directly in the database. Or the raw video used for animal tracking might be stored in a standard video format on the filesystem, but the computed X/Y positions of the animal would be stored in the @@ -164,7 +164,7 @@ This brings us to the final important question: ## How do I get my data out? -This is the fun part. See [queries](query/operators.md) for details of the DataJoint +This is the fun part. See [queries](query/operators.md) for details of the DataJoint query language directly from Python. ## Interfaces diff --git a/docs/src/internal/transpilation.md b/docs/src/internal/transpilation.md index b263c752..b8d81d42 100644 --- a/docs/src/internal/transpilation.md +++ b/docs/src/internal/transpilation.md @@ -59,7 +59,7 @@ The input object is treated as a subquery in the following cases: 1. A restriction is applied that uses alias attributes in the heading. 2. A projection uses an alias attribute to create a new alias attribute. 3. A join is performed on an alias attribute. -4. An Aggregation is used a restriction. +4. An Aggregation is used a restriction. An error arises if diff --git a/docs/src/manipulation/transactions.md b/docs/src/manipulation/transactions.md index c7d6951a..58b9a316 100644 --- a/docs/src/manipulation/transactions.md +++ b/docs/src/manipulation/transactions.md @@ -6,7 +6,7 @@ interrupting the sequence of such operations halfway would leave the data in an state. While the sequence is in progress, other processes accessing the database will not see the partial results until the transaction is complete. -The sequence may include [data queries](../query/principles.md) and +The sequence may include [data queries](../query/principles.md) and [manipulations](index.md). In such cases, the sequence of operations may be enclosed in a transaction. diff --git a/docs/src/publish-data.md b/docs/src/publish-data.md index d766f49d..3ec2d721 100644 --- a/docs/src/publish-data.md +++ b/docs/src/publish-data.md @@ -27,8 +27,8 @@ The code and the data can be found at [https://github.com/sinzlab/Sinz2018_NIPS] ## Exporting into a collection of files -Another option for publishing and archiving data is to export the data from the +Another option for publishing and archiving data is to export the data from the DataJoint pipeline into a collection of files. -DataJoint provides features for exporting and importing sections of the pipeline. -Several ongoing projects are implementing the capability to export from DataJoint +DataJoint provides features for exporting and importing sections of the pipeline. +Several ongoing projects are implementing the capability to export from DataJoint pipelines into [Neurodata Without Borders](https://www.nwb.org/) files. diff --git a/docs/src/quick-start.md b/docs/src/quick-start.md index f3309c06..a7f25565 100644 --- a/docs/src/quick-start.md +++ b/docs/src/quick-start.md @@ -5,7 +5,7 @@ The easiest way to get started is through the [DataJoint Tutorials](https://github.com/datajoint/datajoint-tutorials). These tutorials are configured to run using [GitHub Codespaces](https://github.com/features/codespaces) -where the full environment including the database is already set up. +where the full environment including the database is already set up. Advanced users can install DataJoint locally. Please see the installation instructions below.