热点新闻
封装函数-用R包Seurat跑单套数据
2023-07-05 07:15  浏览:185  搜索引擎搜索“手机展会网”
温馨提示:信息一旦丢失不一定找得到,请务必收藏信息以备急用!本站所有信息均是注册会员发布如遇到侵权请联系文章中的联系方式或客服删除!
联系我时,请说明是在手机展会网看到的信息,谢谢。
展会发布 展会网站大全 报名观展合作 软文发布

呜呜最近发现我工作效率低的一个原因就是重复性工作没有流程化,一气之下,把seurat分析单套数据的流程封装了起来,步骤包含数据质控、数据标准化、聚类以及初步的细胞类型鉴定。细胞类型鉴定是用每个cluster的top marker来标注的。之后再更新整合多套数据的流程,希望与君分享

1. 用到的所有函数放在了SeuratWrapperFunction.R中了

这个需要用source()函数导入到下面封装好的代码中的

### Time: 20221025 ### Author: zhengyiyi ## load function library(Seurat) library(SingleCellExperiment) library(scater) library(MAST) library(ggplot2) library(patchwork) library(RColorBrewer) library(Seurat) library(openxlsx) library("grid") library("ggplotify") library(magrittr) library(dplyr) library(ggsignif) library("ggplot2") library(cowplot) library(openxlsx) CreateFirstFilterSeuratObject <- function(exp_count, pr_name, Or_ident, min_cell, min_gene, mt_pattern,rb_pattern){ # CreateSeuratObject : filter gene and cell by min.cells(Include features detected in at least this many cells) and min.features(Include cells where at least this many features are detected.) exp_count.seurat <- CreateSeuratObject(exp_count, project = pr_name, min.cells = min_cell, min.features = min_gene) exp_count.seurat@meta.data$orig.ident <- as.factor(Or_ident) exp_count.seurat@active.ident <- exp_count.seurat$orig.ident ## Check the mt gene and rp gene exp_count.seurat[["percent.mt"]] <- PercentageFeatureSet(object = exp_count.seurat, pattern = mt_pattern) exp_count.seurat[["percent.rb"]] <- PercentageFeatureSet(object = exp_count.seurat, pattern = rb_pattern) print(levels(exp_count.seurat@active.ident)) print(table(exp_count.seurat$orig.ident)) print(head(x = exp_count.seurat@meta.data, 5)) print(dim(exp_count.seurat@meta.data)) return(exp_count.seurat) } SeuratQC <- function(exp_count.seurat, name_pre){ folder_name="1.QC" if (file.exists(folder_name)){ print("1.QC existed.") } else{ dir.create(folder_name) } file_name=paste(name_pre,"Violinplot.pdf",sep="_") reads.drop <- isOutlier(as.numeric(exp_count.seurat$nCount_RNA), nmads = 3, type = "lower") feature.drop <- isOutlier(as.numeric(exp_count.seurat$nFeature_RNA), nmads = 3, type = "lower") qc.mito2 <- isOutlier(exp_count.seurat$percent.mt, nmads = 3, type="higher") qc.ribo2 <- isOutlier(exp_count.seurat$percent.rb, nmads = 3, type="higher") ### 保留下那些质量差的细胞,看看这些细胞的情况 low_q_cells <- (reads.drop | feature.drop | qc.mito2 | qc.ribo2) exp_count.seurat$lowquality_cells <- "High_qualitycells" exp_count.seurat$lowquality_cells[low_q_cells] <- "Low_qualitycells" plot1=FeatureScatter(object = exp_count.seurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "lowquality_cells") plot2=FeatureScatter(object = exp_count.seurat, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "lowquality_cells") plot3=FeatureScatter(object = exp_count.seurat, feature1 = "nCount_RNA", feature2 = "percent.rb", group.by = "lowquality_cells") pdf(file = file_name, height = 10, width = 20) print(VlnPlot(object = exp_count.seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 2, pt.size=0)) opar <- par(no.readonly = TRUE) par(mfrow=c(2,2)) print(hist(exp_count.seurat$nCount_RNA, breaks = 100, main = "Reads Count Distribution", col = "grey80", xlab = "library size", ylab = "frequency", cex.lab = 1.4, cex.axis = 1.4)) print(abline(v = 10000, col = "red", lwd = 2, lty = 2)) print(hist(exp_count.seurat$nFeature_RNA, breaks = 100, main = "Gene Count Distribution", col = "grey80", xlab = "total mRNA", ylab = "frequency", cex.lab = 1.4, cex.axis = 1.4)) print(abline(v = 1500, col = "red", lwd = 2, lty = 2)) print(hist(exp_count.seurat$percent.mt, breaks = 10, main = "Mitochondrial Percentage Distribution", col = "grey80", xlab = "total mRNA", ylab = "frequency", cex.lab = 1.4, cex.axis = 1.4)) print(abline(v = 20, col = "red", lwd = 2, lty = 2)) print(hist(exp_count.seurat$percent.rb, breaks = 10, main = "Ribosome Percentage Distribution", col = "grey80", xlab = "total mRNA", ylab = "frequency", cex.lab = 1.4, cex.axis = 1.4)) print(abline(v = 20, col = "red", lwd = 2, lty = 2)) par(opar) print(plot1 + plot2 + plot3) dev.off() #copy files to 1.QC result_files <- c(file_name) file.copy(result_files, folder_name ,overwrite = T) file.remove(result_files) print("based on the Violin plot, you can filter the cell by the nFeature_RNA, nCount_RNA and/or percent.mt rb") } SubsetSeuratData <- function(exp_count.seurat, min_gene, max_gene, min_ncountRNA, max_ncountRNA, max_mt_percent, max_rb_percent){ exp_count.seurat <- subset(exp_count.seurat, subset = nCount_RNA < max_ncountRNA & nCount_RNA > min_ncountRNA & nFeature_RNA < max_gene & nFeature_RNA > min_gene & percent.mt < max_mt_percent & percent.rb < max_rb_percent ) # QC qc_pdf_name <- paste("filtered_by_gene", min_gene, "UMI", min_ncountRNA, "percent.mt", max_mt_percent, "percent.rb", max_rb_percent, sep="_") SeuratQC(exp_count.seurat, qc_pdf_name) dim(exp_count.seurat) print(levels(exp_count.seurat$orig.ident)) return(exp_count.seurat) } SelectPCsByLogNormalize <- function(exp_count.seurat, file_prefix, scale_factor, nfeatures, npc_plot){ folder_name="2.RunPCA" if (file.exists(folder_name)){ print("2.RunPCA existed.") } else{ dir.create(folder_name) } DefaultAssay(exp_count.seurat) <- "RNA" # Normalize Find VariableGene and Scale Data exp_count.seurat <- NormalizeData(exp_count.seurat, normalization.method = "LogNormalize", scale.factor = scale_factor) exp_count.seurat <- FindVariableFeatures(exp_count.seurat, selection.method = "vst", nfeatures = nfeatures) exp_count.seurat <- ScaleData(exp_count.seurat, vars.to.regress = c("percent.rb","percent.mt")) # RunPCA exp_count.seurat <- RunPCA(exp_count.seurat, pc.genes = exp_count.seurat@var.genes, npcs = npc_plot, verbose = FALSE,) # Select PCs:1:30pcs selected_pcs_name <- paste0(file_prefix, "_Selected_PCs.pdf") pdf(file = selected_pcs_name, height = 8, width = 8) print(ElbowPlot(object = exp_count.seurat, ndims = npc_plot, reduction = "pca")) dev.off() # Plot pca_plot_name <- paste0(file_prefix, "_PCA_plot",".pdf") pdf(pca_plot_name,8,8) print(DimPlot(exp_count.seurat, reduction="pca", label = TRUE, pt.size=0.5, group.by="ident")) dev.off() #Copy files to 2.RunPCA result_files <- c(selected_pcs_name,pca_plot_name) file.copy(result_files, folder_name ,overwrite = T)#拷贝文件 file.remove(result_files) #移除拷贝完的文件 return(exp_count.seurat) } SelectPCsBySCTransform <- function(exp_count.seurat, file_prefix, npc_plot){ folder_name="2.RunPCA" if (file.exists(folder_name)){ print("2.RunPCA existed.") } else{ dir.create(folder_name) } # Normalize Find VariableGene and Scale Data exp_count.seurat <- SCTransform(exp_count.seurat, vars.to.regress = c("percent.rb","percent.mt")) # RunPCA exp_count.seurat <- RunPCA(exp_count.seurat, verbose = FALSE, npcs = npc_plot) # Select PCs:1:30pcs selected_pcs_name <- paste0(file_prefix, "_Selected_PCs.pdf") pdf(file = selected_pcs_name, height = 8, width = 8) print(ElbowPlot(object = exp_count.seurat, ndims = npc_plot, reduction = "pca")) dev.off() # Plot pca_plot_name <- paste0(file_prefix, "_PCA_plot",".pdf") pdf(pca_plot_name, 8, 8) print(DimPlot(exp_count.seurat,reduction="pca", label = TRUE, pt.size= 0.5,group.by="ident",shape.by="orig.ident")) dev.off() #Copy files to 2.RunPCA result_files <- c(selected_pcs_name,pca_plot_name) file.copy(result_files, folder_name ,overwrite = T)#拷贝文件 file.remove(result_files) #移除拷贝完的文件 return(exp_count.seurat) } # RunUMAP and RunTSNE and Then find cluster PlotCluster <- function(exp_count.seurat, file_prefix, npc_used, k_param, resolution_number){ folder_name="3.PlotCluster" if (file.exists(folder_name)){ print("3.PlotCluster existed.") } else{ dir.create(folder_name) } # FinderNeighbors exp_count.seurat <- FindNeighbors(exp_count.seurat, dims = 1:npc_used, verbose = FALSE, k.param = k_param) exp_count.seurat <- FindClusters(exp_count.seurat, verbose = FALSE, resolution = resolution_number) # RunTSNE and RunUMAP exp_count.seurat <- RunTSNE(exp_count.seurat, dims = 1:npc_used, verbose = FALSE, check_duplicates = FALSE) exp_count.seurat <- RunUMAP(exp_count.seurat, dims = 1:npc_used, verbose = FALSE) # RenameIdents from zero to one levels_define <- as.numeric(levels(exp_count.seurat)) new.cluster.ids <- levels_define + 1 names(new.cluster.ids) <- levels(exp_count.seurat) exp_count.seurat <- RenameIdents(exp_count.seurat, new.cluster.ids) combined_cluster_plotname <- paste0(file_prefix, "_combined_cluster_resolution_",resolution_number,".pdf") pdf(combined_cluster_plotname,7,7) print(DimPlot(exp_count.seurat,reduction="umap",label = TRUE, pt.size = 0.5, label.size = 4.5, repel=TRUE, group.by="ident")) print(DimPlot(exp_count.seurat,reduction="tsne", label = TRUE, pt.size = 0.5, label.size = 4.5, repel=TRUE, group.by="ident")) dev.off() split_clustering_plot_name <- paste0(file_prefix, "_split_clustering_resolution_", resolution_number, ".pdf") pdf(split_clustering_plot_name,14,7) print(DimPlot(exp_count.seurat,reduction="umap",label = TRUE, pt.size = 0.5, label.size = 4.5, group.by="ident", repel=TRUE, split.by="orig.ident")) print(DimPlot(exp_count.seurat,reduction="tsne", label = TRUE, pt.size = 0.5, label.size = 4.5, group.by="ident", repel=TRUE, split.by="orig.ident")) dev.off() # Copy files to 2.Cluster file.copy(combined_cluster_plotname, folder_name ,overwrite = T) file.remove(combined_cluster_plotname) file.copy(split_clustering_plot_name, folder_name ,overwrite = T) file.remove(split_clustering_plot_name) return(exp_count.seurat) } # Find markers for each cluster FindmarkerForCluster <- function(exp_count.seurat, file_prefix, min.pct, logfc.threshold, p_val_adj, mt_rb_pattern){ folder_name="4.MarkersInCluster" if (file.exists(folder_name)){ print("4.MarkersInCluster file existed.") } else{ dir.create(folder_name) } cluster.markers <- FindAllMarkers(object = exp_count.seurat, only.pos = TRUE, min.pct = min.pct, logfc.threshold = logfc.threshold) index <- cluster.markers$p_val_adj < p_val_adj cluster.markers <- cluster.markers[index,] # remove mt and rb gene index <- grep(mt_rb_pattern, cluster.markers$gene) if (length(index) > 0){ cluster.markers <- cluster.markers[-index, ] } save_name <- paste0(file_prefix,"_MarkersInClusters.csv") write.csv(cluster.markers,save_name) file.copy(save_name, folder_name,overwrite = T) file.remove(save_name) return(cluster.markers) } # Top markers for each cluster TopMarkersInCluster <- function(cluster.markers, file_prefix, top_num){ library(dplyr) folder_name="4.MarkersInCluster" if (file.exists(folder_name)){ print("4.MarkersInCluster file existed.") } else{ dir.create(folder_name) } #将readsCountSM.markers传给group_by,group_by按cluster 排序,再将结果传给top_n,top_n按avg_logFC排序,显示每个类中的前两个 top_marker <- cluster.markers %>% group_by(cluster) %>% top_n(n = top_num, wt = avg_log2FC) file_name=paste(file_prefix, "_top_marker", top_num,".csv",sep="") write.csv(top_marker, file =file_name) file.copy(file_name, folder_name,overwrite = T) return(top_marker) file.remove(file_name) } # Rename each cluster with top2 markers MapTop2MarkerEachCluster <- function(exp_count.seurat, cluster.markers, file_prefix){ library(dplyr) library(plyr) exp_count.seurat$seurat_clusters <- exp_count.seurat@active.ident top2 <- cluster.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC) top2 <- top2 %>% group_by(cluster) %>% dplyr::mutate(markers = paste0(gene, collapse = "/")) %>% dplyr::slice(1) marker.names <- top2$markers current.cluster.ids <- as.character(1:(length(unique(Idents(exp_count.seurat))))) new.cluster.ids <- marker.names Idents(exp_count.seurat) <- plyr::mapvalues(Idents(exp_count.seurat), from = current.cluster.ids, to = new.cluster.ids) return(exp_count.seurat) }

2.DataQualityOverviewSingle.R: 数据质控的函数是放在了这个脚本

使用的话需要把source()那行代码改成自己的路径
这边有个特别注意的点就是这个脚本里的第一行
“#!/home/zhengjh/miniconda3/envs/r403/bin/Rscript”
之前我是把脚本放环境变量里了,每次source环境的时候总是就不能直接使用,必须全路径,只要把这个Rscript所在位置换成你用的conda环境中的Rscript就可以了。另外需要chmod +x DataQualityOverviewSingle.R 这个代码,才可以执行

#!/home/zhengjh/miniconda3/envs/r403/bin/Rscript # parameter library(optparse) library(getopt) option_list <- list( make_option(c("-o", "--output"), type = "character", default = FALSE, action = "store", help = "This is the output directory." ), make_option(c("-t", "--Type"), type = "character", default = "10X", action = "store", help = "This is type of exp matrix, default is 10X, the other is .csv which row is gene, column is cell." ), make_option(c("--datapath"), type = "character", default = FALSE, action = "store", help = "This is the path of exp matrix." ), make_option(c("--project_name"), type = "character", default = FALSE, action = "store", help = "This is project name built in seurat object and the pre_fix of files generated by this function." ), make_option(c("--mt_pattern"), type = "character", default = "^mt-", action = "store", help = "This is mitochondria pattern used to calculate its percentage, default is the ^mt-." ), make_option(c("--rb_pattern"), type = "character", default = "^Rpl|^Rps", action = "store", help = "This is ribosome pattern used to calculate its percentage, default is ^Rpl|^Rps" ), make_option(c("--min_cell"), type = "integer", default = 3, action = "store", help = "This is min cells number to filter, default is 3." ), make_option(c("--min_gene"), type = "integer", default = 0, action = "store", help = "This is min gene number to filter, default is 0." ) ) # -help opt = parse_args(OptionParser(option_list = option_list, usage = "This script is general overview data quality of single cell exp matrix!")) print(opt) #### step0.load data #### source("/home/zhengjh/scripts/seurat/function/SeuratWrapperFunction.R") print("Step0: Load Data") ##### step00. load functions #### # read in the functions timestart<-Sys.time() # set the output path cat("Current working dir: ", opt$output) cat("\n") setwd(opt$output) # load ref object #### step01. load exp matrix #### print("Step0: Load exp matrix data") if(opt$Type=="10X"){ print("Read exp matrix from 10X cellranger output") exp_count <- Read10X(data.dir = opt$datapath) print("The rownum and colnum of exp_count is ") print(dim(exp_count)) }else{ print("Read exp matrix from csv file") exp_count <- read.csv(opt$datapath, header = T, row.names = 1) print("The rownum and colnum of normal exp is ") print(dim(exp_count)) } #### step1: Create_seurat_filter_cell ###### print("Step1: Creat Seuart Object with filtering lower genes and lower cells") Or_ident <- rep(opt$project_name, ncol(exp_count)) exp_count.seurat <- CreateFirstFilterSeuratObject(exp_count, opt$project_name, Or_ident, opt$min_cell, opt$min_gene, opt$mt_pattern, opt$rb_pattern) print("The row_num and col_num of the first filtered matrix:") print(dim(exp_count.seurat)) print("The cellnumber of samples:") print(table(exp_count.seurat$orig.ident)) name_pre <- paste0("filter_gene_expressed_", opt$min_cell, "cells", opt$min_gene, "genes") #### step2: Overview of the data quality ###### print("Step2: Overview of the data quality") SeuratQC(exp_count.seurat, name_pre) rds_name <- paste0(opt$project_name, "_dataquality_overview_seurat.rds") saveRDS(exp_count.seurat, file = rds_name) print("Happy~Finished!!!") timeend<-Sys.time() runningtime<-timeend-timestart print(runningtime)

3.LogNormalizeClusteringSingle.R脚本: LogNormalize数据并聚类

#!/home/zhengjh/miniconda3/envs/r403/bin/Rscript # parameter library(optparse) library(getopt) option_list <- list( make_option(c("-o", "--output"), type = "character", default = FALSE, action = "store", help = "This is the output directory." ), make_option(c( "--seuratobject"), type = "character", default = "10X", action = "store", help = "This is absoulte path of seuratobject." ), make_option(c("--min_gene"), type = "integer", default = 50, action = "store", help = "This is min gene number to filter, default is 50." ), make_option(c("--max_gene"), type = "integer", default = 10000, action = "store", help = "This is max gene number to filter, default is 10000." ), make_option(c("--min_ncountRNA"), type = "integer", default = 500, action = "store", help = "This is min ncountRNA to filter, default is 5000." ), make_option(c("--max_ncountRNA"), type = "integer", default = 100000, action = "store", help = "This is min ncountRNA to filter, default is 100000." ), make_option(c("--max_mt_percent"), type = "integer", default = 60, action = "store", help = "This is min mitochondria percentage to filter, default is 60." ), make_option(c("--max_rb_percent"), type = "integer", default = 60, action = "store", help = "This is min ribosome percentage to filter, default is 60." ), make_option(c("--file_prefix"), type = "character", action = "store", help = "This is number of file prefix to used in the following analysis." ), make_option(c("--scale_factor"), type = "integer", default = 10000, action = "store", help = "This is scale factor used in Seurat NormalizeData function, default is 10000." ), make_option(c("--nfeatures"), type = "integer", default = 2000, action = "store", help = "This is number of features used in Seurat FindVariableFeatures function, default is 2000." ), make_option(c("--npc_plot"), type = "integer", default = 50, action = "store", help = "This is number of principles to plot, default is 50." ), make_option(c("--npc_used"), type = "integer", default = 20, action = "store", help = "This is number of principles to use in RunTSNE and RunUMAP function, default is 20." ), make_option(c("--k_parameter"), type = "integer", default = 20, action = "store", help = "This is k parameter used in FindNeighbors function which will influence the clusering number, default is 20." ), make_option(c("--resolution_number"), type = "double", default = 0.5, action = "store", help = "This is resolution number used in FindClusters function , default is 0.5" ), make_option(c("--min_pct"), type = "double", default = 0.25, action = "store", help = "This is min.pct used in FindAllMarkers function , default is 0.25" ), make_option(c("--logfc_threshold"), type = "double", default = 0.25, action = "store", help = "This is logfc.threshold used in FindClusters function , default is 0.25" ), make_option(c("--p_val_adj"), type = "double", default = 0.05, action = "store", help = "This is p_val_adj used to filter marker , default is 0.05" ), make_option(c("--top_num"), type = "integer", default = 10, action = "store", help = "This is top num marker of each cluster, default is 10" ), make_option(c("--mt_rb_pattern"), type = "character", default = "^mt-|^Rpl-|^Rps-", action = "store", help = "This is mt_rb_pattern used to filter mt/rb genes found in cluster markers, default is ^mt-|^Rpl-|^Rps-" ) ) # -help opt = parse_args(OptionParser(option_list = option_list, usage = "This script is general processing of single cell exp matrix!")) print(opt) source("/home/zhengjh/scripts/seurat/function/SeuratWrapperFunction.R") ###### step0. subset seurat object #### timestart<-Sys.time() # set the output path setwd(opt$output) exp_count.seurat <- readRDS(opt$seuratobject) print("Step0: Subset Seurat Object") cat(opt$min_gene, "< gene number <", opt$max_gene, "\n") cat(opt$min_ncountRNA, "< ncountRNA <", opt$max_ncountRNA, "\n") cat("max_mt_percent ", opt$max_mt_percent, "\n") cat("max_rb_percent ", opt$max_rb_percent, "\n") exp_count.seurat <- SubsetSeuratData(exp_count.seurat = exp_count.seurat, min_gene = opt$min_gene, max_gene = opt$max_gene, min_ncountRNA = opt$min_ncountRNA, max_ncountRNA = opt$max_ncountRNA, max_mt_percent = opt$max_mt_percent, max_rb_percent = opt$max_rb_percent) #### step1. normalize data and run pca #### print("Step1: LogNormalize Data and Run PCA") cat("scale factor used in NormalizeData function is ", opt$scale_factor, "\n") cat("nfeatures used in Seurat FindVariableFeatures function is ", opt$nfeatures, "\n") cat("npc_plot used in ElbowPlot is ", opt$npc_plot, "\n") exp_count.seurat <- SelectPCsByLogNormalize(exp_count.seurat = exp_count.seurat, file_prefix = opt$file_prefix, scale_factor = opt$scale_factor, nfeatures = opt$nfeatures, npc_plot = opt$npc_plot) #### step2. find clusters #### print("Step2: FindNeighbors and clusters and then run tsne and umap reduction") cat("k_param used in Seurat FindNeighbors function is ", opt$k_param, "\n") cat("resolution_number used in Seurat FindClusters function is ", opt$resolution_number, "\n") cat("npc_used used in RunTSNE and RUNUMAP is ", opt$npc_used, "\n") exp_count.seurat <- PlotCluster(exp_count.seurat = exp_count.seurat, file_prefix = opt$file_prefix, npc_used = opt$npc_used, k_param = opt$k_param, resolution_number = opt$resolution_number) #### step3. find top markers ##### print("Step3: find top markers and then map cluster with top2 marke") cat("min.pct used in Seurat FindAllMarkers function is ", opt$min.pct, "\n") cat("logfc.threshold used in Seurat FindAllMarkers function is ", opt$logfc.threshold, "\n") cat("p_val_adj used to filter markers is ", opt$p_val_adj, "\n") cluster.markers <- FindmarkerForCluster(exp_count.seurat = exp_count.seurat, file_prefix = opt$file_prefix, min.pct = opt$min_pct, logfc.threshold = opt$logfc_threshold, p_val_adj = opt$p_val_adj, mt_rb_pattern = opt$mt_rb_pattern) TopMarker <- TopMarkersInCluster(cluster.markers = cluster.markers, file_prefix = opt$file_prefix, top_num = opt$top_num) # Top markers heatmap P_heatmap_top_marker <- DoHeatmap(exp_count.seurat, features = TopMarker$gene) filename <- paste0(opt$file_prefix, "_Heatmap_plot_top_markers.pdf") folder_name <- "4.MarkersInCluster" pdf(filename, 30, 15) print(P_heatmap_top_marker) dev.off() file.copy(filename, folder_name, overwrite = T) #copy files file.remove(filename) #### step4. map top2 marker to each cluster ##### print("step4: map top2 marker to each cluster") exp_count.seurat <- MapTop2MarkerEachCluster(exp_count.seurat = exp_count.seurat, cluster.markers = cluster.markers, file_prefix = opt$file_prefix) #### step5. save the clustering plots aftering assign top2 markers ### print("step5. save the clustering plots aftering assign top2 markers") p1 <- DimPlot(exp_count.seurat, reduction = "umap",label = TRUE, pt.size = 0.5, label.size = 4.5, repel = TRUE, group.by = "ident") + NoLegend() p1 <- p1 + labs(title="Clustering UMAP Reduction") p1 <- p1 + theme(plot.title = element_text(hjust = 0.5)) p2 <- DimPlot(exp_count.seurat, reduction = "umap",label = TRUE, pt.size = 0.5, label.size = 4.5, repel = TRUE, group.by = "ident") p2 <- p2 + labs(title="Clustering TSNE Reduction") p2 <- p2 + theme(plot.title = element_text(hjust = 0.5)) plots <- plot_grid(p1, p2, ncol=2,rel_widths = c(1.7, 2.2)) cluster_pdfname <- paste0(opt$file_prefix, "_Clustering_TSNE_UMAP.pdf") folder_name <- "3.PlotCluster" pdf(cluster_pdfname, width = 18, height = 6) print(plots) dev.off() file.copy(cluster_pdfname, folder_name, overwrite = T)#拷贝文件 file.remove(cluster_pdfname) rds_name <- paste0(opt$file_prefix, "_lognormalize_clustering_seurat.rds.rds") saveRDS(exp_count.seurat, file = rds_name) print("Happy~Finished!!") timeend<-Sys.time() runningtime<-timeend-timestart print(runningtime)

4. SCTransformClusteringSingle.R 脚本: SCTtransform数据并聚类

#!/home/zhengjh/miniconda3/envs/r403/bin/Rscript # parameter library(optparse) library(getopt) option_list <- list( make_option(c("-o", "--output"), type = "character", default = FALSE, action = "store", help = "This is the output directory." ), make_option(c( "--seuratobject"), type = "character", default = "10X", action = "store", help = "This is absoulte path of seuratobject." ), make_option(c("--min_gene"), type = "integer", default = 50, action = "store", help = "This is min gene number to filter, default is 50." ), make_option(c("--max_gene"), type = "integer", default = 10000, action = "store", help = "This is max gene number to filter, default is 10000." ), make_option(c("--min_ncountRNA"), type = "integer", default = 500, action = "store", help = "This is min ncountRNA to filter, default is 5000." ), make_option(c("--max_ncountRNA"), type = "integer", default = 100000, action = "store", help = "This is min ncountRNA to filter, default is 100000." ), make_option(c("--max_mt_percent"), type = "integer", default = 60, action = "store", help = "This is min mitochondria percentage to filter, default is 60." ), make_option(c("--max_rb_percent"), type = "integer", default = 60, action = "store", help = "This is min ribosome percentage to filter, default is 60." ), make_option(c("--file_prefix"), type = "character", action = "store", help = "This is number of file prefix to used in the following analysis." ), make_option(c("--npc_plot"), type = "integer", default = 50, action = "store", help = "This is number of principles to plot, default is 50." ), make_option(c("--npc_used"), type = "integer", default = 20, action = "store", help = "This is number of principles to use in RunTSNE and RunUMAP function, default is 20." ), make_option(c("--k_parameter"), type = "integer", default = 20, action = "store", help = "This is k parameter used in FindNeighbors function which will influence the clusering number, default is 20." ), make_option(c("--resolution_number"), type = "double", default = 0.5, action = "store", help = "This is resolution number used in FindClusters function , default is 0.5" ), make_option(c("--min_pct"), type = "double", default = 0.25, action = "store", help = "This is min.pct used in FindAllMarkers function , default is 0.25" ), make_option(c("--logfc_threshold"), type = "double", default = 0.25, action = "store", help = "This is logfc.threshold used in FindClusters function , default is 0.25" ), make_option(c("--p_val_adj"), type = "double", default = 0.05, action = "store", help = "This is p_val_adj used to filter marker , default is 0.05" ), make_option(c("--top_num"), type = "integer", default = 10, action = "store", help = "This is top num marker of each cluster, default is 10" ), make_option(c("--mt_rb_pattern"), type = "character", default = "^mt-|^Rpl-|^Rps-", action = "store", help = "This is mt_rb_pattern used to filter mt/rb genes found in cluster markers, default is ^mt-|^Rpl-|^Rps-" ) ) # -help opt = parse_args(OptionParser(option_list = option_list, usage = "This script is general processing of single cell exp matrix!")) print(opt) source("/home/zhengjh/scripts/seurat/function/SeuratWrapperFunction.R") ###### step0. subset seurat object #### timestart<-Sys.time() # set the output path setwd(opt$output) exp_count.seurat <- readRDS(opt$seuratobject) print("Step0: Subset Seurat Object") cat(opt$min_gene, "< gene number <", opt$max_gene, "\n") cat(opt$min_ncountRNA, "< ncountRNA <", opt$max_ncountRNA, "\n") cat("max_mt_percent ", opt$max_mt_percent, "\n") cat("max_rb_percent ", opt$max_rb_percent, "\n") exp_count.seurat <- SubsetSeuratData(exp_count.seurat = exp_count.seurat, min_gene = opt$min_gene, max_gene = opt$max_gene, min_ncountRNA = opt$min_ncountRNA, max_ncountRNA = opt$max_ncountRNA, max_mt_percent = opt$max_mt_percent, max_rb_percent = opt$max_rb_percent) #### step1. normalize data and run pca #### print("Step1: SCTtransform Data and Run PCA") cat("scale factor used in NormalizeData function is ", opt$scale_factor, "\n") cat("nfeatures used in Seurat FindVariableFeatures function is ", opt$nfeatures, "\n") cat("npc_plot used in ElbowPlot is ", opt$npc_plot, "\n") exp_count.seurat <- SelectPCsBySCTransform(exp_count.seurat = exp_count.seurat, file_prefix = opt$file_prefix, npc_plot = opt$npc_plot) #### step2. find clusters #### print("Step2: FindNeighbors and clusters and then run tsne and umap reduction") cat("k_param used in Seurat FindNeighbors function is ", opt$k_param, "\n") cat("resolution_number used in Seurat FindClusters function is ", opt$resolution_number, "\n") cat("npc_used used in RunTSNE and RUNUMAP is ", opt$npc_used, "\n") exp_count.seurat <- PlotCluster(exp_count.seurat = exp_count.seurat, file_prefix = opt$file_prefix, npc_used = opt$npc_used, k_param = opt$k_param, resolution_number = opt$resolution_number) #### step3. find top markers ##### print("Step3: find top markers and then map cluster with top2 marke") cat("min.pct used in Seurat FindAllMarkers function is ", opt$min.pct, "\n") cat("logfc.threshold used in Seurat FindAllMarkers function is ", opt$logfc.threshold, "\n") cat("p_val_adj used to filter markers is ", opt$p_val_adj, "\n") cluster.markers <- FindmarkerForCluster(exp_count.seurat = exp_count.seurat, file_prefix = opt$file_prefix, min.pct = opt$min_pct, logfc.threshold = opt$logfc_threshold, p_val_adj = opt$p_val_adj, mt_rb_pattern = opt$mt_rb_pattern) TopMarker <- TopMarkersInCluster(cluster.markers = cluster.markers, file_prefix = opt$file_prefix, top_num = opt$top_num) # Top markers heatmap P_heatmap_top_marker <- DoHeatmap(exp_count.seurat, features = TopMarker$gene) filename <- paste0(opt$file_prefix, "_Heatmap_plot_top_markers.pdf") folder_name <- "4.MarkersInCluster" pdf(filename, 30, 15) print(P_heatmap_top_marker) dev.off() file.copy(filename, folder_name, overwrite = T)# copy files file.remove(filename) #### step4. map top2 marker to each cluster ##### print("step4: map top2 marker to each cluster") exp_count.seurat <- MapTop2MarkerEachCluster(exp_count.seurat = exp_count.seurat, cluster.markers = cluster.markers, file_prefix = opt$file_prefix) #### step5. save the clustering plots aftering assign top2 markers ### print("step5. save the clustering plots aftering assign top2 markers") p1 <- DimPlot(exp_count.seurat, reduction = "umap",label = TRUE, pt.size = 0.5, label.size = 4.5, repel = TRUE, group.by = "ident") + NoLegend() p1 <- p1 + labs(title="Clustering UMAP Reduction") p1 <- p1 + theme(plot.title = element_text(hjust = 0.5)) p2 <- DimPlot(exp_count.seurat, reduction = "umap",label = TRUE, pt.size = 0.5, label.size = 4.5, repel = TRUE, group.by = "ident") p2 <- p2 + labs(title="Clustering TSNE Reduction") p2 <- p2 + theme(plot.title = element_text(hjust = 0.5)) plots <- plot_grid(p1, p2, ncol=2,rel_widths = c(1.7, 2.2)) cluster_pdfname <- paste0(opt$file_prefix, "_Clustering_TSNE_UMAP.pdf") folder_name <- "3.PlotCluster" pdf(cluster_pdfname, width = 18, height = 6) print(plots) dev.off() file.copy(cluster_pdfname, folder_name, overwrite = T)#拷贝文件 file.remove(cluster_pdfname) rds_name <- paste0(opt$file_prefix, "_sctransform_clustering_seurat.rds.rds") saveRDS(exp_count.seurat, file = rds_name) print("Happy~Finished!!") timeend<-Sys.time() runningtime<-timeend-timestart print(runningtime)

5.附上使用说明

1) 把SeuratWrapperFunction.R模块下面的代码复制到文件SeuratWrapperFunction.R,并存储
2) 把DataQualityOverviewSingle.R模块下面的代码复制到文件DataQualityOverviewSingle.R,并存储
3) 把LogNormalizeClusteringSingle.R模块下面的代码复制到文件LogNormalizeClusteringSingle.R,并存储
4) 把SCTransformClusteringSingle.R模块下面的代码复制到文件SCTransformClusteringSingle.R,并存储
5) 可执行权限

chmod +x DataQualityOverviewSingle.R chmod +x LogNormalizeClusteringSingle.R chmod +x SCTransformClusteringSingle.R

6)使用

## 质量控制 Rscript ~/scripts/seurat/pipeline/DataQualityOverviewSingle.R -o /home/zhengjh/other/PinealGland/Results/Zebrafish -t 10X --datapath /home/zhengjh/other/PinealGland/Data/Zebrafish_GSE123778_RAW/GSM3511192_scSeq1 \ --project_name "zebrafish_scseq1" --mt_pattern "^mt-" --rb_pattern "^rpl|^rps" --min_cell 3 --min_gene 100 ## LogNormalize Rscript ~/scripts/seurat/pipeline/LogNormalizeClusteringSingle.R -o ~/other/PinealGland/Results/Zebrafish --seuratobject ~/other/PinealGland/Results/Zebrafish/zebrafish_scseq1_dataquality_overview_seurat.rds \ --min_gene 200 --max_gene 6500 --min_ncountRNA 200 --max_ncountRNA 75000 \ --max_mt_percent 20 --max_rb_percent 20 --file_prefix zebrafish_scseq1_logtransform \ --scale_factor 10000 --nfeatures 2000 --npc_plot 50 --npc_used 30 \ --k_parameter 20 --resolution_number 0.5 --min_pct 0.25 --logfc_threshold 0.25 \ --p_val_adj 0.05 --top_num 10 --mt_rb_pattern "^mt-|rpl|rps" ## SCTransform Rscript ~/scripts/seurat/pipeline/SCTransformClusteringSingle.R -o ~/other/PinealGland/Results/Zebrafish --seuratobject ~/other/PinealGland/Results/Zebrafish/zebrafish_scseq1_dataquality_overview_seurat.rds --min_gene 200 --max_gene 6500 --min_ncountRNA 200 --max_ncountRNA 75000 \ --max_mt_percent 20 --max_rb_percent 20 --file_prefix zebrafish_scseq1_scttransform \ --npc_plot 50 --npc_used 30 \ --k_parameter 20 --resolution_number 0.5 --min_pct 0.25 --logfc_threshold 0.25 \ --p_val_adj 0.05 --top_num 10 --mt_rb_pattern "^mt-|rpl|rps"

7) 结果展示



结果展示

最后放个图片
祝君开心快乐 健康!
也希望自己一直保持学习 不郁闷呜呜u





image.png

发布人:4f51****    IP:101.229.16.***     举报/删稿
展会推荐
让朕来说2句
评论
收藏
点赞
转发