diff --git a/.gitignore b/.gitignore index 0a1f914..22d820f 100644 --- a/.gitignore +++ b/.gitignore @@ -149,4 +149,11 @@ dmypy.json # Files generated during package build and CMD check, unnecessary for the package atlasapprox.Rcheck/ .Rhistory -.RData \ No newline at end of file +.RData + +# sphinx gallery src +docs/source/_build +docs/source/python/gallery + +# OS system file +**/.DS_Store \ No newline at end of file diff --git a/docs/gallery/python/quickstart.py b/docs/gallery/python/0_quickstart.py similarity index 75% rename from docs/gallery/python/quickstart.py rename to docs/gallery/python/0_quickstart.py index 9bbefa5..19fd3dd 100644 --- a/docs/gallery/python/quickstart.py +++ b/docs/gallery/python/0_quickstart.py @@ -2,7 +2,8 @@ Quickstart ========== -This example shows a quick and easy example of how to use atlasapprox to get and plot the expression of some gees in a specific organ of a specific organism. +This example shows a quick and easy example of how to use atlasapprox to get and plot the expression of some gees in a +specific organ of a specific organism. """ import matplotlib.pyplot as plt @@ -19,3 +20,5 @@ fig, ax = plt.subplots(figsize=(7, 4)) sns.heatmap(expression, ax=ax) fig.tight_layout() + +# sphinx_gallery_thumbnail_path = '_static/quickstart.png' \ No newline at end of file diff --git a/docs/gallery/python/1_beginner_guide.py b/docs/gallery/python/1_beginner_guide.py new file mode 100644 index 0000000..08c2d38 --- /dev/null +++ b/docs/gallery/python/1_beginner_guide.py @@ -0,0 +1,100 @@ +""" +.. _beginner-guide: + +Beginner guide +============== + +The `atlasapprox `_ API provides access to approximated +cell atlas data from 30 species, including both animals and plants. You can explore data from species such as *Homo sapiens* (humans), *Mus musculus* (mice), *Arabidopsis thaliana* (thale cress), and *Zea mays* (corn). +This guide walks you through installing the package, setting up the API, and running basic queries with simple examples. +""" + +# %% +# Setting up a virtual environment (optional) +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# It's recommended to use a virtual environment to manage dependencies. Run the following command: +# +# ``python -m venv venv`` +# +# Activate the environment using the appropriate command for your operating system: +# +# For macOS/Linux: +# ``source venv/bin/activate`` +# +# For Windows: +# ``venv\Scripts\activate`` + +# %% +# Installing required packages +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# Install the ``atlasapprox`` package along with libraries needed for data visualization using *pip*: +# +# ``pip install atlasapprox matplotlib seaborn numpy pandas`` +# +# These packages will be used throughout the tutorials. + +# %% +# Then, import required libraries and instantiate the ``API`` object: + +import atlasapprox + +api = atlasapprox.API() + +# %% +# Querying available data +# ^^^^^^^^^^^^^^^^^^^^^^^ +# Explore available organisms, organs, and cell types using the following methods: + +# %% + +# List available organisms +available_organisms = api.organisms() +print(available_organisms) + +# %% + +# List available organs for humans +available_organs = api.organs(organism="h_sapiens") +print(available_organs) + +# %% + +# List available cell types in the human lung +available_celltypes = api.celltypes(organism="h_sapiens", organ="lung") +print(available_celltypes) + + +# %% +# Exploring average gene expression +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# The ``average`` method retrieves gene expression levels for selected genes in a specific organ of an organism. +# +# The following example shows how to examine the average expression of five genes +# (*COL13A1*, *COL14A1*, *TGFBI*, *PDGFRA*, *GZMA*) in the human lung: + +avg_gene_expr_lung = api.average( + organism = "h_sapiens", + organ = "lung", + features = ["COL13A1", "COL14A1", "TGFBI", "PDGFRA", "GZMA"], + measurement_type = "gene_expression" +) + +# Display the result +avg_gene_expr_lung + +# %% +# Understanding the output +# ------------------------ +# The ``average`` method returns a **pandas.DataFrame** where: +# +# - Rows represent genes. +# - Columns represent cell types. +# - Values show average gene expression in counts per ten thousand (cptt). + +# %% +# Conclusion +# ^^^^^^^^^^ +# This guide covers setup and basic data querying. For more detailed +# information, refer to the official `documentation `_. + +# sphinx_gallery_thumbnail_path = '_static/beginner_guide.png' \ No newline at end of file diff --git a/docs/gallery/python/2_average_expression.py b/docs/gallery/python/2_average_expression.py new file mode 100644 index 0000000..85419da --- /dev/null +++ b/docs/gallery/python/2_average_expression.py @@ -0,0 +1,299 @@ +""" +.. _average_expression: + +Exploring average gene expression +================================= + +This tutorial demonstrates how to explore average gene expression across cell +types using the `atlasapprox `_ +API, with examples based on human data. + +If you haven't already completed the installation and API setup, please refer to +he :ref:`beginner-guide` tutorial before continuing. +""" + +# %% +# Contents +# ^^^^^^^^ +# - `Querying average gene expression for a single organ `__ +# - `Querying average gene expression for multiple organs `__ +# - `Exploring genes with similar expression patterns `__ +# - `Finding cell-type-specific markers in an organ `__ + +# %% +# Importing required libraries +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Import the necessary libraries and instantiate the ``API`` object: + +import atlasapprox +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np + +api = atlasapprox.API() + +# %% +# .. _average-expression: +# +# Querying average gene expression for a single organ +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# The ``average`` method retrieves the average gene expression of selected genes +# across cell types within a specific organ. Let's explore this using four genes +# (*PRDM1*, *PTPRC*, *ACTB*, *GAPDH*) in the human lung: + +avg_gene_expr_lung = api.average( + organism='h_sapiens', + organ='lung', + features=['PRDM1', 'PTPRC', 'ACTB', 'GAPDH'], + measurement_type='gene_expression' +) + +# Display the result +avg_gene_expr_lung + +# %% +# Understanding the output +# ------------------------ +# The ``average`` method returns a **pandas.DataFrame** where: +# +# - Each row represents a gene. +# - Each column corresponds to a cell type. +# - Values show average gene expression in counts per ten thousand (cptt). +# +# *ACTB* consistently exhibits higher expression across all cell types than the +# other genes, while *PRDM1* exhibits very low expression overall. +# +# Interpreting large sets of numerical data can be challenging, but visualizing +# it graphically highlights differences and improves understanding. + +# %% +# Visualizing the data +# -------------------- +# To visualize the average expression data of queried genes, a heatmap is +# recommended. Python's visualization libraries `Seaborn `_ +# and `Matplotlib `_ provide powerful tools for creating +# heatmaps. Run the following code to create one with custom labels: + +fig, ax = plt.subplots(figsize=(7, 5)) +heatmap = sns.heatmap(avg_gene_expr_lung, ax=ax) + +# Customize labels +plt.title('Average gene expression across cell types in the human lung') +plt.xlabel('Cell type') +plt.ylabel('Gene') + +# Display heatmap +fig.tight_layout() + +# %% +# The color gradient in the heatmap makes it easier to compare expression levels +# across cell types than reading raw values in a **pandas.DataFrame**. This +# visualization maps expression values to colors—brighter shades indicate higher +# expression, while darker shades represent lower levels. The heatmap clearly +# shows that *ACTB* exhibits high expression across all cell types, whereas +# *PRDM1* shows consistently low expression. + +# %% +# .. _multi-organs: +# +# Querying average gene expression for multiple organs +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# This example shows the average gene expression of four genes across three +# human organs: blood, lung, and liver. The `atlasapprox `_ +# API does not support querying multiple organs simultaneously, so we use a +# for-loop instead. + +# Define the target organs. +organ_list = ['blood', 'lung', 'liver'] + +# Iterate through the organ list +for organ in organ_list: + avg_gene_expr = api.average( + organism='h_sapiens', + organ=organ, + features=['PRDM1', 'PTPRC', 'ACTB', 'GAPDH'], + ) + + fig, ax = plt.subplots(figsize=(7, 5)) + heatmap = sns.heatmap(avg_gene_expr, ax=ax) + + # Customize labels + plt.title(f'Average gene expression across cell types in the human {organ}') + plt.xlabel("Cell type") + plt.ylabel("Gene") + + fig.tight_layout() + +# %% +# These three heatmaps show that *ACTB* consistently exhibits high expression +# across the selected organs. This reflects the typical high and stable +# expression of housekeeping genes, essential for basic cellular functions +# across various organs and cell types. +# +# Adapt this code to explore gene expression with the `atlasapprox `_ +# API using your genes and organs of interest. + +# %% +# .. _similar-features: +# +# Exploring genes with similar expression patterns +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# Building on the analysis of average gene expression across organs, this +# section introduces the ``similar_features`` method to explore related gene +# expression, using *ACTB* as an example gene of interest. +# +# This method helps answer questions like "Which genes exhibit similar +# expression patterns to a chosen gene in this organ?" and retrieves the top 10 +# similar genes, as shown below: + +similar_features = api.similar_features( + organism='h_sapiens', + organ='lung', + feature='ACTB', + method='correlation', + number=10 +) + +# Display the result +similar_features + +# %% +# The ``similar_features`` method returns a **pandas.Series** with gene names as +# the index and their Pearson correlation coefficients relative to the gene of +# interest (e.g., *ACTB*) as values. +# These coefficients, which range from -1 (perfect negative correlation) to 1 +# (perfect positive correlation), indicate how similar each gene's expression +# pattern is to that of the target gene. +# +# For example, *LASP1* has the highest similarity to *ACTB* (0.483309), +# suggesting potential co-regulation across cell types in the lung. + +# %% +# Additionally, other than correlation, methods like euclidean and +# manhattan are also available. Please refer to the `atlasapprox `_ +# API documentation for details. + +# %% +# Visualise the expression profile of these genes +# ----------------------------------------------- +# To visualize the expression profiles of these genes, use a heatmap by +# extracting gene names from ``similar_features.index`` and including the gene +# of interest (e.g., *ACTB*) for comparison. +# +# Pass these as the ``features`` parameter to the ``average`` method, then plot +# the results: + +# Combine ACTB and its similar features into a single list for better comparison +gene_list = ['ACTB'] + list(similar_features.index) + +# Get average gene expression +avg_similar_features = api.average( + organism='h_sapiens', + organ='lung', + features=gene_list +) + +# Display the heatmap +fig, ax = plt.subplots(figsize=(8, 6)) +heatmap = sns.heatmap(avg_similar_features,ax=ax) + +# Customize labels +plt.title(f'Average gene expression across ACTB and its similar features') +plt.xlabel("Cell type") +plt.ylabel("Gene") + +fig.tight_layout() + +# %% +# .. _marker-genes: +# +# Finding cell-type-specific markers in an organ +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# So far, the focus has been on known genes or those similar to a gene of interest. +# But what if no specific gene comes to mind? +# One way to begin is by exploring marker genes — genes that are highly and +# specifically expressed in certain cell types — and then query their average +# expression. +# +# For example, here's how to retrieve the top 10 marker genes for neutrophils in +# the human lung: + +markers_in_lung_neutrophil = api.markers( + organism='h_sapiens', + organ='lung', + cell_type='neutrophil', + number=10 +) + +markers_in_lung_neutrophil + +# %% +# Visualize expression profiles using a heatmap +# --------------------------------------------- + +avg_gene_expr_markers = api.average( + organism='h_sapiens', + organ='lung', + features=markers_in_lung_neutrophil +) + +fig, ax = plt.subplots(figsize=(8, 6)) +heatmap = sns.heatmap(avg_gene_expr_markers, ax=ax) + +# Set labels +plt.title("Average expression of marker genes in lung neutrophil") +plt.xlabel("Cell type") +plt.ylabel("Gene") + +fig.tight_layout() + +# %% +# The heatmap displays average expression levels of five neutrophil marker genes +# across cell types. Most areas appear dark, indicating low expression (0-20 cptt) +# in non-neutrophil cells, highlighting these genes' specificity. + +# %% +# Applying log transformation +# --------------------------- +# The dark regions in the heatmap suggest a wide range of expression values, +# where low values (e.g., 0-20 cptt) are overshadowed by much higher ones. +# Applying a logarithmic transformation compresses this dynamic range and makes +# subtle differences more visible. +# +# The code below applies this transformation, using a pseudo-count to avoid +# issues with zero values: + +avg_gene_expr_markers_log = np.log(avg_gene_expr_markers + 1) + +fig, ax = plt.subplots(figsize=(8, 6)) +heatmap = sns.heatmap(avg_gene_expr_markers_log, ax=ax) + +# Set labels +plt.title("Log-transformed expression of markers in lung neutrophils") +plt.xlabel("Cell type") +plt.ylabel("Gene") + +fig.tight_layout() + +# %% +# The log-transformed heatmap reveals cell type-specific expression patterns for +# neutrophil marker genes. +# +# *CXCR2*, *FCGR3B*, and *VNN2* exhibit especially high expression (bright +# colors) in neutrophils, confirming their specificity. *G0S2* also shows +# elevated expression in neutrophils, with moderate levels observed in monocytes, +# dendritic cells, alveolar fibroblasts, and vascular smooth muscle — possibly +# indicating a role in related immune or tissue functions. +# Darker regions in other cell types suggest minimal or no expression, further +# emphasizing the neutrophil specificity of these markers. + +# %% +# Conclusion +# ^^^^^^^^^^ +# This tutorial demonstrated how to retrieve average gene expression using +# atlasapprox API functions and visualize the data using Python libraries such +# as `Seaborn `_ and `Matplotlib `_. +# For further details, consult the `official documentation `_. + +# sphinx_gallery_thumbnail_path = '_static/average_expression.png' \ No newline at end of file diff --git a/docs/gallery/python/3_cell_type.py b/docs/gallery/python/3_cell_type.py new file mode 100644 index 0000000..28b8020 --- /dev/null +++ b/docs/gallery/python/3_cell_type.py @@ -0,0 +1,232 @@ +""" +.. _cell_types: + +Exploring cell types +==================== + +Understanding how cell types are distributed across organs is a key aspect of analyzing single-cell data. +This tutorial covers how to retrieve cell type distributions, visualize abundance across tissues, and identify dominant cell populations in specific organs using the `atlasapprox API `_. + +Examples are based on human data for clarity, but the same approach applies to other species in the atlasapprox API. +Before getting started, make sure you've completed the setup in the :ref:`beginner-guide`. + +""" + +# %% +# Contents +# ^^^^^^^^ +# - `Retrieving cell type distribution across organs `__ +# - `Zooming into a specific organ `__ +# - `Identifying marker genes for a cell type `__ + + +# %% +# Importing required libraries +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Import the necessary libraries and instantiate the ``API`` object: + +import atlasapprox +import matplotlib.pyplot as plt +import seaborn as sns + +api = atlasapprox.API() + +# %% +# .. _retrieve-distributions: +# +# Retrieving cell type distribution across organs +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# To obtain the distribution of cell types in the human cell atlas, use the ``celltypexorgan`` method. +# This provides a comprehensive overview of cell abundances and helps identify the most prevalent cell types across different tissues. + +# Query cell types across human organs +human_celltypes = api.celltypexorgan( + organism="h_sapiens", + measurement_type="gene_expression" +) + +# Display the cell type distribution matrix +human_celltypes + +# %% +# Understanding the output +# ------------------------ +# The ``celltypexorgan`` method returns a **pandas.DataFrame** where: +# +# - Each row represents a unique cell type. +# - Each column represents an organ. +# - The values represent counts of sampled cells for that cell type in each organ (a value of 0 means the cell type was not detected) +# +# For example, in the lung, there are 1,307 T cells and 12,160 macrophages, while hepatocytes have a count of 0, +# indicating they were not detected. +# +# At first glance, some cell type such as macrophages, T cell, and B cells are found across multiple tissue, while others +# like schwann and thymocytes are much rarer. However, looking at these numbers alone can make it hard to spot trends +# across many organs and cell types, especially in a large dataset like this one. + +# %% +# Visualizing the data +# -------------------- +# Visualization reveals patterns hidden by raw numbers. Organs vary in sampled cell totals, so raw counts can be misleading—those with more samples may appear to have higher cell type numbers, even if the type is rare. +# Proportions normalize the data to reflect true distribution within organs. Using macrophages as an example, their proportion is visualized to assess their role as key immune cells across tissues. + +# Compute proportion of macrophages in each organ +proportions = (human_celltypes.loc["macrophage"] / human_celltypes.sum(axis=0)) * 100 + +# Plot bar chart +proportions.sort_values(ascending=False).plot(kind='bar') +plt.xlabel('Organ') +plt.ylabel('Percentage(%)') +plt.title('Proportion of macrophage across organs') + +# Display bar chart +plt.tight_layout() + +# %% +# The bar chart indicates macrophages are highly abundant in the lung, comprising +# nearly 35% of sampled cells. This suggests the lung is a primary site for +# macrophage activity, while other organs like liver and bladder show moderate +# proportions, and many, such as pancreas and colon, exhibit minimal presence. +# The variation highlights the diverse roles macrophages play across the body's tissues. + +# %% +# Comparing cell type proportions +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# To explore further, one can focus on the proportions of specific cell types +# across organs and compare multiple types to reveal differences in their distributions. +# For example, consider macrophages alongside T cells—both are immune cells, but +# their roles and abundances vary across the body. + +# Select cell types to compare +cell_types = ["T", "macrophage"] + +# Calculate percentage of each cell type in each organ +proportions = (human_celltypes.loc[cell_types] / human_celltypes.sum(axis=0)) * 100 + +# Plot bars for each cell type +proportions.T.plot(kind="bar") +plt.xlabel("Organ") +plt.ylabel("Percentage(%)") +plt.title("Proportion of macrophage and T cells across organs") + +plt.tight_layout() + +# %% +# The plot shows how macrophages and T cells are spread across different organs. +# For example, the lung and liver have more macrophages, while the gut, lymph +# nodes, and skin have more T cells. +# The atlasapprox API enables further investigation, and to explore other +# cell types or species of interest, the code can be adapted by changing the parameters. + +# %% +# .. _specific-organ: +# +# Zooming into a specific organ +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# After examining cell type distributions across multiple organs, the next step is to focus on a specific organ +# to better understand its tissue-specific cell composition. +# +# This example selects the lung and identifies the most abundant cell types beyond macrophages. +# Only cell types with non-zero proportions are included in the plot to simplify the visualization. + +# Plot lung cell type proportions +lung_pct = (human_celltypes["lung"] / human_celltypes["lung"].sum() * 100).sort_values(ascending=False) +lung_pct[lung_pct > 0].plot(kind="bar") + +plt.title("Cell type proportions in lung") +plt.xlabel("Cell type") +plt.ylabel("Percentage(%)") +plt.tight_layout() + +# %% +# The bar chart shows that macrophages represent the largest proportion (~34%) +# of lung cells, followed by AT2 cells (~25%) and monocytes (~7%). In contrast, +# rare cell types such as NK cells, lymphatic endothelial cells, and ionocytes +# account for less than 2%. + +# %% +# .. _markers: +# +# Identifying marker genes for a cell type within an organ +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# To further characterize a specific cell type within a tissue, marker genes can +# be retrieved using the ``markers`` method. +# +# The following example identifies the top 10 marker genes for macrophages in the lung: + +human_lung_macrophage_markers = api.markers( + organism="h_sapiens", + organ="lung", + cell_type="macrophage", + number=10, + measurement_type='gene_expression' +) + +human_lung_macrophage_markers + +# %% +# To analyze how these marker genes are expressed across different lung cell +# types, use the ``average`` method: + +human_lung_macrophage_markers_exp = api.average( + organism="h_sapiens", + organ="lung", + features=human_lung_macrophage_markers, + measurement_type='gene_expression' +) + +human_lung_macrophage_markers_exp + +# %% +# Understanding the output +# ------------------------ +# The ``average`` function returns a **pandas.DataFrame** where: +# +# - Each row represents a gene. +# - Each column corresponds to a cell type. +# - The values represent average expression in counts per ten thousand (cptt). +# +# Marker gene expression is generally higher in macrophages than in other lung +# cell types. For example, *MARCO* shows an average expression of 12.84 cptt in +# macrophages, compared to just 0.12 in neutrophils and 0.18 in B cells. This +# confirms *MARCO* as a highly specific marker for macrophages in the lung. + +# %% +# Visualizing marker gene expression with a heatmap +# ------------------------------------------------- +# While the table provides a detailed quantitative view, visualizing the +# expression data as a heatmap can help highlight global patterns and make +# comparisons across cell types more intuitive. + +# Create the heatmap +fig, ax = plt.subplots(figsize=(7, 5)) +heatmap = sns.heatmap(human_lung_macrophage_markers_exp, ax=ax) + +# Set labels +plt.title("Average expression of marker genes in lung macrophage") +plt.xlabel("Cell type") +plt.ylabel("Gene") + +fig.tight_layout() + +# %% +# The heatmap displays the average expression of selected marker genes across +# lung cell types, with color intensity representing expression levels in counts +# per ten thousand (cptt). Darker colors indicate lower expression, while +# lighter shades (e.g., pink and white) reflect higher expression. Genes such as +# *MARCO* and *MSR1* exhibit strong, macrophage-specific expression, confirming +# their relevance as marker genes for this cell type. + +# %% +# Conclusion +# ^^^^^^^^^^ +# This tutorial introduced how to explore cell types, their distribution, and +# marker genes using a single species' atlas. The human lung served as an +# example, but the same approach can be applied to any other species or cell +# type available in the approximated cell atlases. Try adjusting the cell_type +# and organism parameters to explore new biological insights. +# +# For more detailed information, please refer to the `official documentation `_. + +# sphinx_gallery_thumbnail_path = '_static/cell_type.png' \ No newline at end of file diff --git a/docs/gallery/python/4_markers.py b/docs/gallery/python/4_markers.py new file mode 100644 index 0000000..c3ebe3d --- /dev/null +++ b/docs/gallery/python/4_markers.py @@ -0,0 +1,184 @@ +""" +.. _explore_markers: + +Exploring marker genes +====================== +Marker genes are essential for defining cell identity and distinguishing one +cell type from another. This tutorial demonstrates how to retrieve marker genes +for a specific cell type, analyze their expression and detection across tissues, +and visualize their specificity using dot plots—all using the +`atlasapprox API `_. + +Examples are based on human data for clarity, but the same approach applies to +other species in the atlasapprox api. Before getting started, make sure you've +completed the setup in the :ref:`beginner-guide`. +""" + +# %% +# Contents +# ^^^^^^^^ +# +# - `Find marker genes for a specific cell type in an organ (e.g., T cells in the human lung) `__ +# - `Check expression levels and detection fractions of marker genes across cell types `__ +# - `Querying the gene sequence `__ + + +# %% +# Importing required libraries +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Import the necessary libraries and instantiate the ``API`` object: + +import atlasapprox +import matplotlib.pyplot as plt +import seaborn as sns +import pandas as pd + +api = atlasapprox.API() + + +# %% +# .. _marker: +# +# Finding marker genes for a cell type +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Marker genes help identify cell types by exhibiting enriched or exclusive +# expression. Use the ``markers`` method to retrieve the top 10 marker genes for +# T cells in the human lung. + +human_lung_T_markers = api.markers( + organism='h_sapiens', + organ='lung', + cell_type='T', + number=10, + measurement_type='gene_expression') + +human_lung_T_markers + +# %% +# .. _expression-fraction: +# +# Analyzing marker gene expression and fraction +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Use the ``average`` method to examine how these marker genes are expressed +# across different lung cell types. + +human_lung_T_markers_exp = api.average( + organism='h_sapiens', + organ='lung', + features=human_lung_T_markers, + measurement_type='gene_expression') + +human_lung_T_markers_exp + +# %% +# Understanding the output +# ------------------------ +# +# This method returns a **pandas.DataFrame** where: +# +# - Each row represents a gene. +# - Each column corresponds to a cell type. +# - Values indicate the average gene expression, measured in counts per ten thousand (cptt). + +# %% +# For example, *CD3D* shows an expression of 8.62 cptt in T cells, but only 0.06 +# in neutrophils and 0.045 in basophil, highlighting its specificity to T cells +# in the lung. + +# %% +# To explore how broadly these genes are expressed, use the ``fraction_detected`` method: + +human_lung_fraction = api.fraction_detected( + organism='h_sapiens', + organ='lung', + features=human_lung_T_markers, + measurement_type='gene_expression') + +human_lung_fraction + +# %% +# This output shows the fraction of cells expressing each gene, ranging from 0 +# to 1. A value of 1 means the gene is expressed in 100% of the cells of that +# type, while 0 means none do. For example, *CD3E* has a detection rate of 0.828 +# in T cells (82.8% of T cells express it), but only 0.0057 in neutrophils. +# *CD3D* is expressed in 86.8% of T cells and only 1.1% of neutrophils. These +# patterns reinforce the specificity of these genes as T cell markers. + +# %% +# Dot plot visualization +# ---------------------- + +# Reshape and prepare data +data = pd.melt( + human_lung_T_markers_exp.reset_index(), + id_vars='index', + var_name='Cell types', + value_name='Expression').rename(columns={'index': 'Genes'}) + +data['Fraction'] = pd.melt( + human_lung_fraction.reset_index(), + id_vars='index')['value'].clip(0, 1) + +# Plot dot plot +plt.figure(figsize=(9, 4)) +sns.scatterplot( + data=data, + x='Cell types', + y='Genes', + size='Fraction', + hue='Expression', + sizes=(30, 200)) + +plt.xticks(rotation=90) +plt.legend(bbox_to_anchor=(1, 1)) + +# %% +# In this visualization, each dot corresponds to a gene-cell type pair. Both the +# size and color of the dot convey biological meaning: The dot size represents +# the fraction of cells within the cell type that express the gene, while the +# color indicates the average expression level. Larger, darker dots therefore +# represent genes that are both highly expressed and broadly detected. +# +# For instance, *CD3E* and *CD3D* stand out in T cells as large, dark circles, +# indicating strong expression and widespread detection. In contrast, the same +# genes appear as small, pale dots in other cell types, emphasizing their +# specificity to T cells. + + +# %% +# .. _sequence: +# +# Querying protein sequences of marker genes +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Marker genes can also be analyzed at the protein sequence level to support +# tasks such as structural analysis, or similarity searches (e.g., BLASTp). To +# retrieve these amino acid sequences efficiently, utilize the ``sequences`` +# method as demonstrated below. + +sequence = api.sequences( + organism='h_sapiens', + features=human_lung_T_markers, + measurement_type='gene_expression') + +# Display retrieved sequences in a readable format +print(f"Sequence type: {sequence['type']}") + +for gene, seq in zip(human_lung_T_markers, sequence['sequences']): + print(f"\n{gene}:\n{seq}") + +# %% +# Conclusion +# ^^^^^^^^^^ +# +# This tutorial demonstrated how to retrieve marker genes for a specific cell +# type, assess their expression levels and detection frequency, visualize their +# specificity using dot plots, and extract protein sequences for further analysis. +# +# For more detailed information, please refer to the +# `official documentation `_. + +# sphinx_gallery_thumbnail_path = '_static/markers.png' \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index 28fe6b5..2d1f671 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -5,3 +5,4 @@ matplotlib seaborn pandas requests +sphinx-new-tab-link \ No newline at end of file diff --git a/docs/source/_static/average_expression.png b/docs/source/_static/average_expression.png new file mode 100644 index 0000000..3368d4f Binary files /dev/null and b/docs/source/_static/average_expression.png differ diff --git a/docs/source/_static/beginner_guide.png b/docs/source/_static/beginner_guide.png new file mode 100644 index 0000000..e7809a2 Binary files /dev/null and b/docs/source/_static/beginner_guide.png differ diff --git a/docs/source/_static/cell_type.png b/docs/source/_static/cell_type.png new file mode 100644 index 0000000..9a6f463 Binary files /dev/null and b/docs/source/_static/cell_type.png differ diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css new file mode 100644 index 0000000..f410649 --- /dev/null +++ b/docs/source/_static/css/custom.css @@ -0,0 +1,220 @@ +/* Improve code block readability */ +pre { + background-color: #f8f9fa; + border: 1px solid #ddd; + border-radius: 5px; + padding: 15px; + font-size: 0.9em; + line-height: 1.5; + overflow-x: auto; /* Add horizontal scroll for long code lines */ +} + +/* Style tables */ +/* Ensure all tables have borders, padding, and alignment */ +table { + border-collapse: collapse !important; /* Ensure borders collapse into a single line */ + max-width: 100% !important; /* Maintain max width */ + max-height: 400px !important; /* Maintain max height */ + overflow-x: auto !important; /* Enable horizontal scrolling */ + overflow-y: auto !important; /* Enable vertical scrolling */ + display: block !important; /* Ensure table behaves as a block */ + white-space: nowrap; +} + +table th, table td { + border: 1px solid #ddd !important; /* Force border on all sides */ + padding: 6px; /* Unified padding */ + text-align: left; + font-size: 14px; /* Ensures text remains readable */ + text-overflow: initial !important; + overflow: visible !important; + white-space: nowrap !important; +} + +/* Improve code block readability */ +pre { + background-color: #f8f9fa; + border: 1px solid #ddd; + border-radius: 5px; + padding: 15px; + font-size: 0.9em; + line-height: 1.5; + overflow-x: auto; /* Add horizontal scroll for long code lines */ +} + +/* Style tables */ +/* Ensure all tables have borders, padding, and alignment */ +table { + border-collapse: collapse !important; /* Ensure borders collapse into a single line */ + max-width: 100% !important; /* Maintain max width */ + max-height: 400px !important; /* Maintain max height */ + overflow-x: auto !important; /* Enable horizontal scrolling */ + overflow-y: auto !important; /* Enable vertical scrolling */ + display: block !important; /* Ensure table behaves as a block */ +} + +table th, table td { + border: 1px solid #ddd !important; /* Force border on all sides */ + padding: 6px; /* Unified padding */ + text-align: left; + font-size: 14px; /* Ensures text remains readable */ + text-overflow: initial !important; +} + +/* Style table headers */ +table th { + background-color: #f1f1f1; + color: #333; + font-weight: bold; +} + +/* Alternate row coloring for better readability */ +table tr:nth-child(even) { + background-color: #f9f9f9; +} + +/* Specific styling for Sphinx gallery script output tables */ +.sphx-glr-script-out table { + border-collapse: collapse !important; /* Reinforce border collapsing */ + max-width: 100% !important; + max-height: 400px !important; + overflow-x: auto !important; + overflow-y: auto !important; + display: block !important; +} + +.sphx-glr-script-out table th, +.sphx-glr-script-out table td { + border: 1px solid #ddd !important; /* Ensure borders are applied */ + padding: 6px !important; + text-align: left !important; + font-size: 14px !important; +} + +.sphx-glr-script-out img { + max-width: 100% !important; /* Ensure image fits container */ + height: auto !important; /* Maintain aspect ratio */ +} + +.sphx-glr-script-out { + overflow-x: auto !important; /* Allow horizontal scrolling if needed */ + max-height: none !important; /* Remove height limit */ +} + +/* Improve headings */ +h1, h2, h3, h4, h5, h6 { + color: #2c3e50; + margin-top: 1.5em; + margin-bottom: 0.5em; +} + +/* Adjust body text for better readability */ +body { + line-height: 1.7; + color: #333; +} + +/* Style external links */ +a.reference.external::after { + content: " ↗"; + color: #2980B9; + font-size: 0.8em; +} + +.wy-table-responsive { + margin-bottom: 24px; + max-width: 100%; + overflow: visible; +} + +/* Ensure the gallery grid layout */ +.sphx-glr-thumbcontainer { + flex-direction: column; + align-items: center; + justify-content: center; + text-align: center; + background: white; + border-radius: 10px; + box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1); + padding: 15px; + margin: 10px; + width: 150px; /* Adjust width */ + height: 150px; /* Allow the container to grow with the content */ + overflow: visible !important; /* Prevent clipping of the title */ + line-height: 1.2em; + white-space: normal !important; + display: inline-block; + transition: transform 0.2s ease-in-out, box-shadow 0.2s ease-in-out; +} + +/* Hover effect */ +.sphx-glr-thumbcontainer:hover { + transform: translateY(-5px); + box-shadow: 0px 6px 12px rgba(0, 0, 0, 0.15); +} + +/* Style the text inside the thumbnail */ +.sphx-glr-thumbnail-title { + font-size: 14px; + font-weight: bold; + color: #333; + margin-top: 10px; + white-space: normal !important; /* Allow text to wrap */ + overflow: visible !important; /* Prevent clipping */ + text-overflow: initial !important; /* Disable ellipsis */ + width: 100%; /* Allow the title to use the full width of the container */ + line-height: 1.2em; /* Improve readability for wrapped text */ +} + +/* Style the images inside the thumbnails */ +.sphx-glr-thumbcontainer img { + max-width: 100%; + max-height: 130px; + border-radius: 5px; + object-fit: contain; +} + +/* Make sure the gallery grid is centered */ +.sphx-glr-thumbcontainer a { + text-decoration: none; + color: inherit; +} + +/* Ensure the container of thumbnails uses grid or flexbox properly */ +.sphx-glr-thumbnails { + display: flex; + flex-wrap: wrap; /* Allow wrapping to prevent overlap */ + justify-content: center; /* Center thumbnails in the row */ + gap: 20px; /* Increase spacing between thumbnails */ + align-items: flex-start; /* Align items to the top */ +} + +.sphx-glr-thumbcontainer[tooltip]:hover::before, +.sphx-glr-thumbcontainer[tooltip]:hover::after { + display: none; +} + +div.sphx-glr-download-link-note { + display: none; +} + +h2 { + font-size: 1.5em; + font-weight: bold; +} + +h3 { + font-size: 1.2em; + font-weight: bold; +} + +/* hide downloading bottons */ +div.sphx-glr-download, +div.sphx-glr-footer { + display: none !important; +} + +/* Hide running time */ +p.sphx-glr-script-out { + display: none !important; +} \ No newline at end of file diff --git a/docs/source/_static/data.png b/docs/source/_static/data.png new file mode 100644 index 0000000..8cfaf03 Binary files /dev/null and b/docs/source/_static/data.png differ diff --git a/docs/source/_static/markers.png b/docs/source/_static/markers.png new file mode 100644 index 0000000..d9e0bef Binary files /dev/null and b/docs/source/_static/markers.png differ diff --git a/docs/source/_static/quickstart.png b/docs/source/_static/quickstart.png new file mode 100644 index 0000000..1d2695a Binary files /dev/null and b/docs/source/_static/quickstart.png differ diff --git a/docs/source/conf.py b/docs/source/conf.py index 6206c55..4761cef 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -2,8 +2,9 @@ # # For the full list of built-in configuration values, see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html - +# Configuration file for the Sphinx documentation builder. # Early import forfeits the credit warning for atlasapprox + import os os.environ["ATLASAPPROX_HIDECREDITS"] = "yes" @@ -24,23 +25,38 @@ "sphinx_gallery.gen_gallery", ] sphinx_tabs_disable_tab_closing = True +show_source = True templates_path = ["_templates"] exclude_patterns = [] # -- Options for sphinx-gallery ---------------------------------------------- + +def file_name_sort_key(example): + return os.path.basename(example) + sphinx_gallery_conf = { - "filename_pattern": "/.*.py", - "examples_dirs": [ - "../gallery/python", - ], - "gallery_dirs": [ - "python/gallery", - ], + "filename_pattern": r".*\.py", + "examples_dirs": ["../gallery/python"], + "gallery_dirs": ["python/gallery"], + 'within_subsection_order': file_name_sort_key, + "remove_config_comments": True, # hides config-style comments like thumbnail_path + 'download_all_examples': False, # hide zip downloading tab + 'show_memory': False, # hide memory source + 'show_signature': False, + 'show_api_usage': False, + 'min_reported_time': float('inf'), # hide running time } # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output html_theme = "sphinx_rtd_theme" -html_static_path = ["_static"] +html_static_path = ['_static'] +html_css_files = ['css/custom.css'] + +extensions += ["sphinx_new_tab_link"] + +# optional tweaks +new_tab_link_show_external_link_icon = False # add an icon after each link +new_tab_link_enable_referrer = True # keep the HTTP referrer if you need it \ No newline at end of file