In this document, we include code for reproducing figures in the paper.
Figure 1c
library(data.table)
library(ggplot2)
library(egg)
library(ggsci)
library(ggrepel)
library(tidyverse)
library(cowplot)
library(ggthemes)
library(dplyr)
library(forcats)
blank_theme <- theme_minimal()+
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title=element_text(size=14, face="bold")
)
meta = fread('./meta/meta_all_gene.csv')
df = table(meta$tech)
df = data.table(tech = names(df), value = as.numeric(df))
df2 <- df %>%
mutate(csum = rev(cumsum(rev(value))),
pos = value/2 + lead(csum, 1),
pos = if_else(is.na(pos), value/2, pos))
k1 = ggplot(df, aes(x = "" , y = value, fill = fct_inorder(tech))) +
geom_col(width = 1, color = 1) +
coord_polar(theta = "y") +
scale_fill_brewer(palette = "Pastel1") +
geom_label_repel(data = df2,
aes(y = pos, label = value),
size = 10, nudge_x = 1, show.legend = FALSE) +
guides(fill = guide_legend(title = "Technology")) +
theme_void()+theme(legend.position = "right",legend.title = element_text(size=25),legend.text = element_text(size=25))
df = data.table(group = c('ST','Visium','VisiumHD'), value = c(60145,2336306,1896744))
df2 <- df %>%
mutate(csum = rev(cumsum(rev(value))),
pos = value/2 + lead(csum, 1),
pos = if_else(is.na(pos), value/2, pos))
k2 = ggplot(df, aes(x = "" , y = value, fill = fct_inorder(group))) +
geom_col(width = 1, color = 1) +
coord_polar(theta = "y") +
scale_fill_brewer(palette = "Pastel1") +
geom_label_repel(data = df2,
aes(y = pos, label = value),
size = 10, nudge_x = 1, show.legend = FALSE) +
guides(fill = guide_legend(title = "Tech (spot)")) +
theme_void()+theme(legend.position = "right",legend.title = element_text(size=25),legend.text = element_text(size=25))
species_temp = meta$species
species_temp[! species_temp %in% c('human','mouse','human & mouse')]='other'
df = table(species_temp)
df = data.table(group = names(df), value = as.numeric(df))
df2 <- df %>%
mutate(csum = rev(cumsum(rev(value))),
pos = value/2 + lead(csum, 1),
pos = if_else(is.na(pos), value/2, pos))
k3 = ggplot(df, aes(x = "" , y = value, fill = fct_inorder(group))) +
geom_col(width = 1, color = 1) +
coord_polar(theta = "y") +
scale_fill_brewer(palette = "Pastel1") +
geom_label_repel(data = df2,
aes(y = pos, label = value),
size = 10, nudge_x = 1, show.legend = FALSE) +
guides(fill = guide_legend(title = "Species")) +
theme_void()+theme(legend.position = "right",legend.title = element_text(size=25),legend.text = element_text(size=25))
tissue = meta$tissue
tissue[! tissue %in% names(sort(-table(meta$tissue)))[1:10]]='other'
df = table(tissue)
df = data.table(group = names(df), value = as.numeric(df))
df2 <- df %>%
mutate(csum = rev(cumsum(rev(value))),
pos = value/2 + lead(csum, 1),
pos = if_else(is.na(pos), value/2, pos))
k4 = ggplot(df, aes(x = "" , y = value, fill = fct_inorder(group))) +
geom_col(width = 1, color = 1) +
coord_polar(theta = "y") +
scale_fill_tableau(palette = "Tableau 20") +
geom_label_repel(data = df2,
aes(y = pos, label = value),
size = 10, nudge_x = 1, show.legend = FALSE) +
guides(fill = guide_legend(title = "Tissue")) +
theme_void()+theme(legend.position = "right",legend.title = element_text(size=25),legend.text = element_text(size=25))
k = plot_grid(k1,k2,k3,k4,nrow=1)
Figure 3
Code to generate tSNE embedding:
import torch
import clip
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import sys
from sklearn.manifold import TSNE
import glob
now = datetime.now()
anno_path_list = glob.glob(f'./annotation/Human_Brain_Maynard*')
#anno_path_list = glob.glob('/proj/yunligrp/users/jiawen/st_data_paper/figure/table1/anno_forlp/*')
index = int(sys.argv[1])
model_list = glob.glob(f'./embedding/withanno/finetune_*_Human_Brain_Maynard_02082021*')
anno_name = anno_path_list[index]
anno_name_save = anno_name.replace('./annotation/','').replace('_anno.csv','')
for model_path in model_list:
model_name = model_path.replace('./embedding/withanno/','').replace('_withanno.csv','')
data = pd.read_csv(model_path,index_col=0,header=0)
anno = pd.read_csv(anno_name,header=0,index_col=0)
overlap_index = list(set(data.index.tolist()).intersection(set(anno.index.tolist())))
data_anno = data.loc[overlap_index,:]
anno = anno.loc[overlap_index,:]
tsne = TSNE(n_components=2, random_state=0)
data_tsne = tsne.fit_transform(data_anno.to_numpy())
data_tsne_pd = pd.DataFrame(data_tsne,index=data_anno.index)
data_tsne_pd.to_csv(f'./figure3/tsne_byanno/{model_name}_{anno_name_save}_tsne.csv')
Code for plotting:
# r/4.3.1
library(spatialLIBD)
library(jpeg)
library(data.table)
library(ggplot2)
library(egg)
library(ggsci)
library(ggrepel)
library(tidyverse)
library(cowplot)
library(ggthemes)
library(tidyr)
lp_result = fread('linear_probing_result32.csv')
lp_result$model_name = gsub('finetune_','finetune ',lp_result$model_name,fixed=TRUE)
lp_result$model_name = gsub('Human_Brain_Maynard_02082021_Visium','spatialLIBD',lp_result$model_name,fixed=TRUE)
lp_result$model_name = gsub('human_brain','human-brain',lp_result$model_name,fixed=TRUE)
lp_result$model_name = gsub('human_breast','human-breast',lp_result$model_name,fixed=TRUE)
lp_result$model_name = gsub('mouse_brain','mouse-brain',lp_result$model_name,fixed=TRUE)
lp_result$model_name = gsub('overlap_hvg','overlap-hvg',lp_result$model_name,fixed=TRUE)
lp_result = lp_result %>% mutate(model_name_raw=model_name) %>% separate(model_name, into = c("model", "training_data",'geneset'), sep = "_")
lp_result_zero = fread('linear_probing_zero_result.csv') %>% rename(model_name_raw=model_name)
lp_result_zero$training_data='zero-shot'
lp_result_zero$model=lp_result_zero$model_name_raw
lp_result_zero$geneset='zero-shot'
lp_result_all = rbind(lp_result,lp_result_zero,fill=TRUE)
lp_result_all$data_name_first20 = substr(lp_result_all$data_name,1,20)
lp_result_all$model_name_raw = factor(lp_result_all$model_name_raw,levels=c(sort(unique(lp_result$model_name_raw)),sort(unique(lp_result_zero$model_name_raw))))
lp_result_all = lp_result_all %>% filter(data_name=='Human_Brain_Maynard_02082021_Visium',training_data %in% c('spatialLIBD','zero-shot'))
lp_result_all$model_name_new = gsub('_spatialLIBD_',' ',lp_result_all$model_name_raw,fixed=TRUE)
lp_result_all$model_name_new = gsub('uni','UNI',lp_result_all$model_name_new,fixed=TRUE)
lp_result_all$model_name_new = factor(lp_result_all$model_name_new,levels=c('CLIP','finetune CLIP hvg','finetune CLIP overlap-hvg','finetune CLIP pca','PLIP','finetune PLIP hvg','finetune PLIP overlap-hvg','UNI'))
lp_result_all = lp_result_all %>% filter(geneset!='pca')
color_palette = c("#1f77b4","#729ECE","#aec7e8","#F28E2B","#FFBE7D","#FFDD71",'#9467BD')
p1=ggplot(lp_result_all) +
geom_bar( aes(x=model_name_new, y=mean_wf1,fill=model_name_new), stat="identity", alpha=1) +
geom_errorbar( aes(x=model_name_new, ymin=mean_wf1-std_wf1, ymax=mean_wf1+std_wf1), colour="orange", alpha=0.9, size=0.4,width = 0.5)+
#facet_grid(~data_name_first20)+
theme_article()+
scale_fill_manual(values=color_palette,name='Model name')+
ylab('Mean F1')+
xlab(NULL)+
theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())#,strip.text.x = element_text(size = 5))
# embedding
dlpfc_model_tsne = function(model_name,model_type='',gene_type='',show_label=FALSE){
# generated tsne embedding using image embedding
path = './figure3/tsne_byanno/'
slides = c('151675')
anno_all = c()
tsne_all = c()
for(slide in slides){
tsne = fread(paste0(path,'/',model_name,'_','Human_Brain_Maynard_02082021_Visium_',slide,'_tsne.csv'),header=TRUE)
colnames(tsne) = c('V1','tSNE1','tSNE2')
anno = fread(paste0('./annotation/Human_Brain_Maynard_02082021_Visium_',slide,'_anno.csv'),header=TRUE)
anno$slide = slide
anno_all = rbind(anno_all,anno)
tsne_all = rbind(tsne_all,tsne)
}
anno_all = anno_all %>% left_join(tsne_all)
if(show_label){
label_text_color='black'
label_alpha=1
legend_position='right'
}else{
label_text_color='white'
label_alpha=0
legend_position='none'
}
model_name = gsub('_',' ',model_name)
k = ggplot(anno_all) +
geom_point(aes(x=tSNE1,y=tSNE2,color=V2),size=0.5,alpha=0.6) +
scale_color_tableau('Tableau 20',direction=-1,name='Brain\nlayer')+
ylab(NULL)+
xlab(paste0(model_type,' ',gene_type))+
theme_article()+
guides(colour = guide_legend(override.aes = list(size=5,alpha=label_alpha)))+
theme(plot.title = element_text(hjust = 0.5),axis.title.x = element_text(size=10),
legend.position=legend_position,
axis.text.x = element_blank(),axis.text.y = element_blank(),axis.ticks.x = element_blank(),axis.ticks.y = element_blank())
return(k)
}
k1=dlpfc_model_tsne('CLIP',show_label=TRUE)
legend = cowplot::get_legend(dlpfc_model_tsne('CLIP',show_label=TRUE))
k1=dlpfc_model_tsne('CLIP','CLIP')
k2=dlpfc_model_tsne('finetune_CLIP_Human_Brain_Maynard_02082021_Visium_hvg','finetune CLIP','hvg')
k3=dlpfc_model_tsne('finetune_CLIP_Human_Brain_Maynard_02082021_Visium_overlap_hvg','finetune CLIP','overlap-hvg')
k5=dlpfc_model_tsne('PLIP','PLIP')
k6=dlpfc_model_tsne('finetune_PLIP_Human_Brain_Maynard_02082021_Visium_hvg','finetune PLIP','hvg')
k7=dlpfc_model_tsne('finetune_PLIP_Human_Brain_Maynard_02082021_Visium_overlap_hvg','finetune PLIP','overlap-hvg')
k8=dlpfc_model_tsne('uni','UNI')
p2 = plot_grid(k1,k2,k3,k5,k6,k7,k8,legend,nrow=1,rel_widths=c(rep(1,7),0.4))
color_palette = c("#1f77b4","#729ECE","#aec7e8","#F28E2B","#FFBE7D","#FFDD71",'#9467BD')
sil_score = fread('silhouette_score_32.csv') %>%
rename(model_name=model) %>%
mutate(model_name = gsub('finetune_','finetune ',model_name,fixed=TRUE)) %>%
mutate(model_name = gsub('_Human_Brain_Maynard_02082021_Visium_',' ',model_name,fixed=TRUE)) %>%
mutate(model_name = gsub('overlap_hvg','overlap-hvg',model_name,fixed=TRUE)) %>%
mutate(model_name = gsub('uni','UNI',model_name,fixed=TRUE)) %>%
filter(model_name != 'finetune CLIP pca') %>%
mutate(model_name_new = factor(model_name,levels=c('CLIP','finetune CLIP hvg','finetune CLIP overlap-hvg','PLIP','finetune PLIP hvg','finetune PLIP overlap-hvg','UNI'))) %>%
mutate(slide = gsub('Human_Brain_Maynard_02082021_Visium_','',slide,fixed=TRUE))%>%
rename('Calinski-Harabasz'=`Calinski-Harabasz_score`,'Davies-Bouldin'=`Davies-Bouldin_score`,'Silhouette'=silhouette_score)
sil_score = melt(sil_score,id.vars=c('model_name','slide','model_name_new'))
p3=ggplot(sil_score) +
geom_boxplot( aes(x=model_name_new, y=value,color=model_name_new), alpha=1,lwd=1) +
geom_point( aes(x=model_name_new, y=value,fill=slide),shape=21,size=1.5,alpha=0.9)+
facet_wrap(~variable,scales='free_y')+
theme_article()+
scale_color_manual(values=color_palette,name='Model name')+
scale_fill_igv(name='Dataset')+
ylab(NULL)+
xlab(NULL)+
guides(fill=guide_legend(ncol=2))+
theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())
library(tiff)
img = readTIFF("./Visium/image/Human_Brain_Maynard_02082021_Visium_151675.png")
grob <- grid::rasterGrob(img, width = grid::unit(1, "npc"), height = grid::unit(1, "npc"))
anno1 = fread('./annotation/Human_Brain_Maynard_02082021_Visium_151675_anno.csv')
coord1 = fread('./Visium/coord/Human_Brain_Maynard_02082021_Visium_151675_coord.csv')
coord1 = coord1 %>% left_join(anno1)
anno_brain = ggplot(coord1) +
geom_spatial(
data = tibble::tibble(grob = list(grob)),
aes(grob = grob),
x = 0.5,
y = 0.5
)+
geom_point(aes(x=xaxis,y=yaxis,color=V2),shape=21,size=0.15,alpha=0.7)+
scale_color_tableau('Tableau 20',direction=-1)+
xlim(0, ncol(img)) +
ylim(nrow(img), 0) +
theme_article()+
coord_fixed(expand = FALSE)+
theme(axis.text = element_blank(),axis.title = element_blank(),axis.ticks = element_blank(),legend.position = "none")
legend2 = cowplot::get_legend(p3)
f_top = plot_grid(p1+facet_grid(~'Mean F1')+theme(legend.position='none',strip.text = element_text(size=10))+ylab(NULL),
p3+theme(legend.position='none',strip.text = element_text(size=10)),
plot_grid(cowplot::get_legend(p1),legend2[3],ncol=1),
anno_brain,
labels=c('a','b','','c'),nrow=1,rel_widths=c(0.2,0.8,0.3,0.5))
blank = ggplot()+theme_void()
k = plot_grid(f_top,
plot_grid(blank,p2+theme(axis.title.x = element_text(size=10)),
nrow=1,rel_widths=c(0.015,1)),
nrow=2,labels=c('','d'),rel_heights=c(1,1))
ggsave('figure3.png',k,bg='white',width=13,height=5,dpi=300)
Figure 4
import re
import sys
from typing import List,Dict
import numpy as np
import pandas as pd
from PIL import Image, ImageOps
from PIL import ImageEnhance
from skimage.segmentation import clear_border
from skimage import measure, color, io
from torchvision import transforms
import pandas as pd
import torch as t
from torch.utils.data import Dataset
from scipy.stats import rankdata
import numpy as np
from torchvision import transforms
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image, ImageOps
import sys
import os
import requests
import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import numpy as np
from torchvision import transforms
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image, ImageOps
from skimage.feature import peak_local_max
from skimage.segmentation import watershed
from scipy import ndimage
import glob
import requests
import scanpy as sc
Image.MAX_IMAGE_PIXELS = 3080000000000000000000000000000000000
def plot_image(dataset_name,slide,tech,data_size,ax,point_size=10):
path = './'
temp_image = Image.open(f'{path}/{tech}/image/{dataset_name}{slide}.png')
temp_image = temp_image.convert('RGB')
coord = pd.read_csv(f'{path}/{tech}/coord/{dataset_name}{slide}_coord.csv',index_col=0,header=0)
anno = pd.read_csv(f'./annotation/{dataset_name}{slide}_anno.csv',index_col=0,header=0)
anno = anno.loc[~anno.index.duplicated(keep='first'),:]
coord = pd.concat([coord,anno],axis=1)
coord.index = range(len(coord.index))
ax.imshow(temp_image)
sns.scatterplot(x=coord.xaxis,y=coord.yaxis,hue=coord.iloc[:,3].tolist(),ax=ax,s=point_size)
#sns.move_legend(ax, "center right",bbox_to_anchor=(1.2, 0.5),frameon=False)
ax.axis('off')
ax.set_title(f'{dataset_name} (n={data_size})')
ax.legend(loc="lower right",markerscale =20/point_size)
plt.clf()
fig,axs = plt.subplots(3,3,figsize=(25,25))
plot_image('Human_Brain_Maynard_02082021_Visium','_151676','Visium',12,axs[0][0],5)
plot_image('Human_Breast_Andersson_10142021_ST','_G2','ST',8,axs[0][1],50)
plot_image('Human_Breast_10X_06232020_Visium_Block_A_Section_1','','Visium',1,axs[0][2],10)
plot_image('Human_Breast_10X_06092021_Visium','','Visium',1,axs[1][0],10)
plot_image('GSE193460','_GSM5808054','Visium',4,axs[1][1],10)
plot_image('Human_Prostate_Erickson_08102022_Visium','_Patient_1_H1_2','Visium',7,axs[1][2],10)
plot_image('GSE175540','_GSM5924030','Visium',23,axs[2][0],10)
plot_image('GSE213688','_GSM6592049','Visium',14,axs[2][1],10)
plot_image('Mouse_Brain_10X_06232020_Visium_Sagittal_Anterior_Section_1','','Visium',1,axs[2][2],10)
axs[2][2].legend().set_visible(False)
plt.tight_layout()
PREVIOUSNuclear segmentation