Cell Annotation
In [ ]:
Copied!
# import libraries
import numpy as np
import pandas as pd
import scanpy as sc
import plotly.express as px
# import libraries
import numpy as np
import pandas as pd
import scanpy as sc
import plotly.express as px
In [ ]:
Copied!
# Function to annotate cells
def annotate_cells(data, method="custom_gene_list", gene_list=None, annotation_map=None, external_tool="CIBERSORT", database_results=None):
"""
Annotates cells based on single-cell gene expression data.
Parameters:
- data: AnnData object or DataFrame containing single-cell gene expression data.
- method: "custom_gene_list" or "external_database".
- gene_list: List of marker genes (required if method="custom_gene_list").
- annotation_map: Dictionary mapping genes to cell types (required if method="custom_gene_list").
- external_tool: Name of the external database/tool (e.g., "CIBERSORT", "ImmuneCellAI").
- database_results: Results from an external database (required if method="external_database").
Returns:
- data: Annotated AnnData/DataFrame with a new column "annotations".
"""
if method == "custom_gene_list":
if gene_list is None or annotation_map is None:
raise ValueError("A gene list and annotation map must be provided for the 'custom_gene_list' method.")
print("Annotating cells using custom gene list...")
# Annotate cells by identifying the dominant cell type based on marker genes
annotations = []
for cell_idx in range(data.shape[0]):
cell_annotation = None
for gene in gene_list:
if gene in data.var_names:
if data[cell_idx, gene].X > 1: # Example threshold for gene expression
cell_annotation = annotation_map.get(gene, "Unknown")
break # Assign the first matching annotation
annotations.append(cell_annotation if cell_annotation else "Unannotated")
data.obs["annotations"] = annotations
elif method == "external_database":
if database_results is None:
raise ValueError("Database results must be provided for the 'external_database' method.")
print(f"Annotating cells using {external_tool} results...")
data.obs["annotations"] = database_results["cell_type"]
else:
raise ValueError("Invalid method. Choose either 'custom_gene_list' or 'external_database'.")
return data
# Function to annotate cells
def annotate_cells(data, method="custom_gene_list", gene_list=None, annotation_map=None, external_tool="CIBERSORT", database_results=None):
"""
Annotates cells based on single-cell gene expression data.
Parameters:
- data: AnnData object or DataFrame containing single-cell gene expression data.
- method: "custom_gene_list" or "external_database".
- gene_list: List of marker genes (required if method="custom_gene_list").
- annotation_map: Dictionary mapping genes to cell types (required if method="custom_gene_list").
- external_tool: Name of the external database/tool (e.g., "CIBERSORT", "ImmuneCellAI").
- database_results: Results from an external database (required if method="external_database").
Returns:
- data: Annotated AnnData/DataFrame with a new column "annotations".
"""
if method == "custom_gene_list":
if gene_list is None or annotation_map is None:
raise ValueError("A gene list and annotation map must be provided for the 'custom_gene_list' method.")
print("Annotating cells using custom gene list...")
# Annotate cells by identifying the dominant cell type based on marker genes
annotations = []
for cell_idx in range(data.shape[0]):
cell_annotation = None
for gene in gene_list:
if gene in data.var_names:
if data[cell_idx, gene].X > 1: # Example threshold for gene expression
cell_annotation = annotation_map.get(gene, "Unknown")
break # Assign the first matching annotation
annotations.append(cell_annotation if cell_annotation else "Unannotated")
data.obs["annotations"] = annotations
elif method == "external_database":
if database_results is None:
raise ValueError("Database results must be provided for the 'external_database' method.")
print(f"Annotating cells using {external_tool} results...")
data.obs["annotations"] = database_results["cell_type"]
else:
raise ValueError("Invalid method. Choose either 'custom_gene_list' or 'external_database'.")
return data
In [ ]:
Copied!
# UMAP Visualization Function
def visualize_annotations_on_umap(data, annotations_column="annotations", n_pcs=50):
"""
Visualizes annotations on UMAP.
Parameters:
- data: AnnData object or DataFrame containing UMAP coordinates and annotations.
- annotations_column: Name of the column containing cell annotations.
- n_pcs: Number of principal components to use for UMAP computation.
Returns:
- fig: Plotly figure with annotated UMAP.
"""
# Ensure UMAP coordinates exist
if "X_umap" not in data.obsm.keys():
print("UMAP coordinates not found. Computing UMAP...")
# Reduce dimensions using PCA
print(f"Reducing dimensionality to {n_pcs} principal components...")
sc.pp.pca(data, n_comps=n_pcs)
# Compute neighbors and UMAP
sc.pp.neighbors(data, use_rep="X_pca") # Use PCA-reduced data
sc.tl.umap(data)
umap_df = pd.DataFrame(data.obsm["X_umap"], columns=["UMAP1", "UMAP2"])
umap_df[annotations_column] = data.obs[annotations_column].values
# Create Plotly UMAP visualization
fig = px.scatter(
umap_df,
x="UMAP1",
y="UMAP2",
color=annotations_column,
title="UMAP with Cell Annotations",
labels={annotations_column: "Cell Type"},
)
fig.update_traces(marker=dict(size=5, opacity=0.8))
return fig
# UMAP Visualization Function
def visualize_annotations_on_umap(data, annotations_column="annotations", n_pcs=50):
"""
Visualizes annotations on UMAP.
Parameters:
- data: AnnData object or DataFrame containing UMAP coordinates and annotations.
- annotations_column: Name of the column containing cell annotations.
- n_pcs: Number of principal components to use for UMAP computation.
Returns:
- fig: Plotly figure with annotated UMAP.
"""
# Ensure UMAP coordinates exist
if "X_umap" not in data.obsm.keys():
print("UMAP coordinates not found. Computing UMAP...")
# Reduce dimensions using PCA
print(f"Reducing dimensionality to {n_pcs} principal components...")
sc.pp.pca(data, n_comps=n_pcs)
# Compute neighbors and UMAP
sc.pp.neighbors(data, use_rep="X_pca") # Use PCA-reduced data
sc.tl.umap(data)
umap_df = pd.DataFrame(data.obsm["X_umap"], columns=["UMAP1", "UMAP2"])
umap_df[annotations_column] = data.obs[annotations_column].values
# Create Plotly UMAP visualization
fig = px.scatter(
umap_df,
x="UMAP1",
y="UMAP2",
color=annotations_column,
title="UMAP with Cell Annotations",
labels={annotations_column: "Cell Type"},
)
fig.update_traces(marker=dict(size=5, opacity=0.8))
return fig
In [ ]:
Copied!
# Example Usage
if __name__ == "__main__":
# Load example single-cell data (AnnData format)
adata = sc.datasets.pbmc3k() # Example dataset
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
# Example custom gene list and annotation map
example_gene_list = ["CD3D", "CD79A", "LYZ"] # Example marker genes for T-cells, B-cells, Monocytes
annotation_map = {
"CD3D": "T cells",
"CD79A": "B cells",
"LYZ": "Monocytes"
}
# Annotate cells
adata = annotate_cells(adata, method="custom_gene_list", gene_list=example_gene_list, annotation_map=annotation_map)
# Visualize annotations on UMAP
fig = visualize_annotations_on_umap(adata, annotations_column="annotations", n_pcs=50)
fig.show()
# Example Usage
if __name__ == "__main__":
# Load example single-cell data (AnnData format)
adata = sc.datasets.pbmc3k() # Example dataset
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
# Example custom gene list and annotation map
example_gene_list = ["CD3D", "CD79A", "LYZ"] # Example marker genes for T-cells, B-cells, Monocytes
annotation_map = {
"CD3D": "T cells",
"CD79A": "B cells",
"LYZ": "Monocytes"
}
# Annotate cells
adata = annotate_cells(adata, method="custom_gene_list", gene_list=example_gene_list, annotation_map=annotation_map)
# Visualize annotations on UMAP
fig = visualize_annotations_on_umap(adata, annotations_column="annotations", n_pcs=50)
fig.show()
Annotating cells using custom gene list... UMAP coordinates not found. Computing UMAP... Reducing dimensionality to 50 principal components...
/Users/jaydeepbhat/Documents/Hackathon/2024_AI_Agent/scripts/talk2cells/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm