Newer
Older
# packages
library(mixOmics)
library(data.table)
library(rtracklayer)
library(tximport)
library(stringr)
library(limma)
library(edgeR)
library(ggplot2)
library(patchwork)
library(ComplexHeatmap)
library(circlize)
library(purrr)
library(UpSetR)
library(svglite)
library(dendextend)
library(jsonlite)
library(ggbeeswarm)
library(seriation)
library(httr)
library(EnhancedVolcano)
library(magick)
library(rsvg)
library(statmod)
setwd("P:/Projet/rosepig_stomach")
metadata <- fread("metadata_both.txt")
gtf <- import(con = "Sus_scrofa.Sscrofa11.1.102.gtf", format = "GFF")
tx2gene <- fread("tx2gene.tsv")
files <- ifelse(
file.path("salmon" , metadata$nfcore_id, "quant.sf"),
file.path("rnaseq_duodenum", "salmon", metadata$nfcore_id, "quant.sf")
)
txi <- tximport(files, type = "salmon", tx2gene = tx2gene, countsFromAbundance = "lengthScaledTPM") # browseVignettes("tximport")
dge <- DGEList(txi$counts)
keep <- filterByExpr(dge, min.count = 4, min.total.count = 8 )
# summary(keep)
# Mode FALSE TRUE
# logical 16958 14822
dge <- dge[keep, ]
dge <- calcNormFactors(dge)
)
design <- model.matrix(~ 0 + met)
contMatrix <- makeContrasts(
FvsF = ((metfed.HRFI.duodenum + metfed.HRFI.stomach + metfed.LRFI.duodenum + metfed.LRFI.stomach)
- (metfasted.HRFI.duodenum + metfasted.HRFI.stomach + metfasted.LRFI.duodenum + metfasted.LRFI.stomach)),
HvsL = ((metfed.HRFI.duodenum + metfed.HRFI.stomach + metfasted.HRFI.duodenum + metfasted.HRFI.stomach)
- (metfed.LRFI.duodenum + metfed.LRFI.stomach + metfasted.LRFI.duodenum + metfasted.LRFI.stomach)),
SvsD = ((metfed.HRFI.stomach + metfasted.HRFI.stomach + metfed.LRFI.stomach + metfasted.LRFI.stomach)
- (metfed.HRFI.duodenum + metfasted.HRFI.duodenum + metfed.LRFI.duodenum + metfasted.LRFI.duodenum)),
SvsDinL = (metfed.LRFI.stomach + metfasted.LRFI.stomach) - (metfasted.LRFI.duodenum + metfed.LRFI.duodenum),
SvsDinH = (metfed.HRFI.stomach + metfasted.HRFI.stomach) - (metfasted.HRFI.duodenum + metfed.HRFI.duodenum),
FvsFinS = (metfed.HRFI.stomach + metfed.LRFI.stomach) - (metfasted.HRFI.stomach + metfasted.LRFI.stomach),
FvsFinD = (metfed.HRFI.duodenum + metfed.LRFI.duodenum) - (metfasted.HRFI.duodenum + metfasted.LRFI.duodenum),
SvsDinFed = (metfed.LRFI.stomach + metfed.HRFI.stomach) - (metfed.LRFI.duodenum + metfed.HRFI.duodenum),
SvsDinFas = (metfasted.LRFI.stomach + metfasted.HRFI.stomach) - (metfasted.LRFI.duodenum + metfasted.HRFI.duodenum),
FvsFinH = ((metfed.HRFI.stomach + metfed.HRFI.duodenum) - (metfasted.HRFI.stomach + metfasted.HRFI.duodenum)),
FvsFinL = ((metfed.LRFI.stomach + metfed.LRFI.duodenum) - (metfasted.LRFI.stomach + metfasted.LRFI.duodenum)),
levels = design
)
# duplicate correlations, is txi$counts the appropriate object ?
block_corr <- duplicateCorrelation(txi$abundance[keep,], design = design, block = metadata$name)
# TODO : duplicateCorrelation in voom & lmfit
# https://forgemia.inra.fr/arthur.durante/pigheat_transcriptome/-/blob/GWAS/04_limma_environment_tempered.R#L122
# duplicateCorrelation(dge, design = design, block = metadata$name)
# https://forgemia.inra.fr/arthur.durante/pigheat_transcriptome/-/blob/GWAS/04_limma_environment_tempered.R#L128
# can be long
mydge <- voom(dge, design = design)
myfit <- lmFit(mydge, design, cor = block_corr$consesus.correlation)
myfit <- contrasts.fit(myfit, contrasts = contMatrix)
myfit <- eBayes(myfit)
summary(decideTests(myfit, adjust.method = "bonf", p.value=0.05))
# FvsF HvsL SvsD SvsDinL SvsDinH FvsFinS FvsFinD SvsDinFed SvsDinFas with p.value = 0.05
# Down 2340 1082 6168 5711 5597 3771 1813 5494 5829
# NotSig 9751 12096 2516 3285 3781 7183 11221 3929 3006
# Up 2731 1644 6138 5826 5444 3868 1788 5399 5987
# FvsF HvsL SvsD SvsDinL SvsDinH FvsFinS FvsFinD SvsDinFed SvsDinFas FvsFinH FvsFinL with Bonferroni and p value = 0.05
# Down 302 33 4221 3362 2951 612 88 2851 3631 92 142
# NotSig 14229 14744 6316 8020 8766 13632 14694 8915 7583 14651 14538
# Up 291 45 4285 3440 3105 578 40 3056 3608 79 142
pca_mat <- txi$abundance[keep,]
pca_mat <- t(pca_mat) |> scale()
acp <- pca(pca_mat, ncomp = 6, center = FALSE, scale = FALSE)
plot(acp)
plotVar(acp, var.names = FALSE, pch = 20)
plotIndiv(acp, group = paste(metadata$tissue), comp = c(1, 2),
ind.names = metadata$condition,
legend = TRUE, title = 'PCA', ellipse = TRUE)
plotIndiv(acp, group = paste(metadata$line, metadata$tissue), ind.names = FALSE,
legend = TRUE, title = 'PCA', pch = 19, ellipse = TRUE, ellipse.level = 0.83)
plotIndiv(
acp, group = paste(metadata$line, metadata$tissue, metadata$feed), ind.names = FALSE,
legend = TRUE, title = 'PCA', pch = 19, ellipse = FALSE,
comp = c(1, 3)
)
plotIndiv(
pca(pca_mat[metadata$line == "HRFI", ], ncomp = 4, center = FALSE, scale = FALSE),
group = metadata$tissue[metadata$line == "HRFI"],
ind.names = metadata$condition[metadata$line == "HRFI"],
legend = TRUE, title = 'HRFI'
)
plotIndiv(
pca(pca_mat[metadata$line == "HRFI", ], ncomp = 4, center = FALSE, scale = FALSE),
group = metadata$tissue[metadata$line == "HRFI"],
pch = 19,
legend = TRUE, title = 'HRFI'
)
plotIndiv(
pca(pca_mat[metadata$line == "LRFI", ], ncomp = 4, center = FALSE, scale = FALSE),
group = metadata$tissue[metadata$line == "LRFI"],
ind.names = metadata$condition[metadata$line == "LRFI"],
legend = TRUE, title = 'LRFI'
)
plotIndiv(
pca(pca_mat[metadata$line == "LRFI", ], ncomp = 4, center = FALSE, scale = FALSE),
group = metadata$tissue[metadata$line == "LRFI"],
pch = 19,
legend = TRUE, title = 'LRFI'
)
genelists <- map(
set_names(c("FvsF", "HvsL", "SvsD", "SvsDinL", "SvsDinH", "FvsFinS",
"FvsFinD", "SvsDinFed", "SvsDinFas", "FvsFinH", "FvsFinL")),
function(x) {
res <- decideTests(myfit, adjust.method = "bonfe", p.value = 0.05)[ , x]
rownames(res[res != 0, ])
}
)
x <- fromList(genelists[c("FvsFinS","FvsFinD")])
upset(x,
matrix.color = viridis::plasma(3, end = 0.90)[2] ,
sets.bar.color = c(viridis::plasma(3, end = 0.90)[1:2]),
main.bar.color = viridis::plasma(3, end = 0.90),
shade.color = "lightblue",
x <- fromList(genelists[c("SvsDinL", "SvsDinH","SvsDinFed", "SvsDinFas")])
upset(x,
matrix.color = viridis::plasma(13, end = 0.90)[5] ,
sets.bar.color = c(
viridis::plasma(13, end = 0.90)[1],
viridis::plasma(13, end = 0.90)[3], viridis::plasma(13, end = 0.90)[4],
viridis::plasma(13, end = 0.90)[2]),
main.bar.color = viridis::plasma(13, end = 0.90),
shade.color = "lightblue",
x <- fromList(genelists[c("FvsFinH", "FvsFinL")])
upset(x,
matrix.color = viridis::plasma(3, end = 0.90)[2] ,
sets.bar.color = c(viridis::plasma(3, end = 0.90)[1:2]),
main.bar.color = viridis::plasma(3, end = 0.90),
shade.color = "lightblue",
x <- fromList(genelists[c("SvsD", "FvsF", "HvsL")])
upset(x,
matrix.color = viridis::plasma(6, end = 0.90)[3] ,
sets.bar.color = c(viridis::plasma(6, end = 0.90)[1:3]),
main.bar.color = c(viridis::plasma(6, end = 0.90)),
shade.color = "lightblue",
topt_plots <- map(
colnames(myfit$contrasts) |> set_names(),
function(x) {
topTable(myfit, coef = x, number = 100000)
}
)
genes_cond_list <- map_chr(topt_plots, \(x) rownames(x[2,]))
plotexpr(genes_cond_list[1])
metadata$linefeed <- with(metadata, paste(line,feed))
plotexpr <- function(gene) {
expr <- t(txi$abundance[gene, , drop = FALSE])
dfp <- metadata
dfp$expr <- expr
ggplot(dfp, aes(
x = factor(ifelse(tissue == "stomach", "stomach","duodenum"), levels = c("stomach", "duodenum")),
y = expr, fill = linefeed
)) +
geom_boxplot(width = 0.4, outlier.shape = NA) +
geom_beeswarm(aes(col = linefeed), dodge.width = 0.4, size = 2, cex = 1.5) +
coord_cartesian(ylim = c(0, NA)) +
scale_fill_manual(values = c("HRFI fasted" = "#787cf077", "HRFI fed" = "#40d9ea77", "LRFI fasted" = "#bae61f77" , "LRFI fed" = "#f390da77")) +
scale_colour_manual(values = c("HRFI fasted" = "#787cf0", "HRFI fed" = "#40d9ea", "LRFI fasted" = "#bae61f" , "LRFI fed" = "#f390da" )) +
labs(title = gene_name, fill = "linefeed", x = "Tissue", y = "TPM") +
theme_bw(base_size = 16) +
theme(plot.title = element_text(size=16))
}
plotexpr("ENSSSCG00000021910") #gastrin
# txi$abundance["ENSSSCG00000021910",]
# [1] 0.000000 1.490134 0.143895 0.000000 0.203100 0.000000 6.526677 0.177739 8476.513934 0.000000
# [11] 0.000000 0.140140 0.000000 0.000000 0.000000 0.532045 0.000000 0.000000 0.000000 0.000000
# [21] 0.137095 0.200292 0.000000 0.000000 16.518600 17.059180 21.023289 20.458764 15.253007 16.526895
# [31] 9.134834 27.400042 43.088366 23.070951 34.766090 22.740114 14.574913 21.858075 5.688995 21.832952
# [41] 28.510405 33.485766 16.650450 21.381344 15.124753 23.930513 21.503706 26.768468
plotexpr("ENSSSCG00000011568") #ghrelin
# txi$abundance["ENSSSCG00000011568",]
# [1] 890.65636 1399.94170 1423.20974 1215.59458 985.95787 772.38344 1067.91157 1256.28097 709.75711 833.25861 1487.33306
# [12] 1706.33572 1281.05627 1008.28069 1549.65570 1298.07903 2344.05073 1441.92782 1333.72837 542.32684 776.45388 1326.46008
# [23] 1412.37424 626.51048 124.21014 194.94674 221.92149 73.08447 80.25826 83.88175 108.30528 86.97938 101.44263
# [34] 80.00987 96.07310 99.91766 59.61474 24.18648 110.45926 58.44334 96.77374 64.43409 143.35242 90.95452
# [45] 139.39761 176.78356 78.14629 111.40827
plotexpr("ENSSSCG00000006792") #pepsinogen
plotexpr("ENSSSCG00000013117") #cobalamin transport
# txi$abundance["ENSSSCG00000013117",]
# [1] 67.53061 70.26652 30.36416 18.92160 122.99833 194.09863 45.23172 68.15924 1415.04444 72.51830 139.74338
# [12] 78.68822 14.49267 27.47639 268.15168 347.88141 31.96975 89.85295 177.80368 239.57244 24.07132 296.80620
# [23] 151.43980 57.31725 96.36608 268.09668 45.31252 20.92685 81.76093 127.98916 103.57274 67.81729 170.77838
# [34] 108.80359 112.53825 58.53950 43.38521 71.45104 115.33778 38.83127 85.37446 130.92601 45.21819 36.13069
# [45] 26.26749 44.24684 91.11347 88.30288
plotexpr("ENSSSCG00000035529") #gastrokin 1
plotexpr("ENSSSCG00000033382") #mucin 5AC
plotexpr("ENSSSCG00000038802") #gastrokin 2
plotexpr("ENSSSCG00000034731") #trefoil factor 1
plotexpr("ENSSSCG00000001849") #best gene for SvsD
# set the parameters
col <- list( #defining the desired colours for the annotations
Line = c("LRFI" = "#423089", "HRFI" = "#ed6e6c"),
Tissue = c("stomach" = "#0D1687", "duodenum" = "#BE3885"),
Family = c("A" = "#008000", "B" = "#E59866","C" = "#A2D9CE", "D" = "#008080", "E" = "#F7DC6F", "F" = "#000080" ),
Sex = c("F" = "#89014D", "M" = "#53848B"),
Condition = c("fed" = "#1E8449", "fasted" = "#F39C12")
)
ha <- HeatmapAnnotation( #defining the desired annotations
Tissue = metadata$tissue,
Line = metadata$line,
Family = metadata$family,
Condition = metadata$feed,
Sex = metadata$sex,
)
col_heatmap <- colorRamp2(c(-2.0, 0, 2.0), c("#5555FF", "white", "#FF5555")) #defining the legend range of colours
hclust_olo <- function(mdist, ...) { #clustering method function
myClust <- hclust(mdist, ...)
myOlo <- seriation::seriate(mdist, method = "OLO")
seriation::permute(myClust, myOlo)
}
show_heatmap <- \(contrast){
mat <- txi$abundance[genelists[[contrast]], ] |> t() |> scale() |> t()
colnames(mat) <- metadata$condition
row_dend <- as.dendrogram(hclust_olo(dist(mat))) #clustering
row_dend <- color_branches(row_dend, k = 2, col = c("UP_expressed" = "#423089", "DOWN_expressed" = "#ed6e6c"))
hm <- Heatmap(mat,
name = "Expression\n(Z-score)",
col = col_heatmap,
column_title = "Samples (24)",
row_title = paste("Genes (",length(genelists[[contrast]]),")"),
border_gp = gpar(col = "black", lty = 1),
top_annotation = ha,
show_row_names = FALSE,
show_column_names = FALSE,
use_raster = TRUE,
column_split = metadata$tissue, cluster_columns = TRUE, cluster_column_slices = FALSE,
)
plot(hm,
column_title = contrast,
}
show_heatmap("FvsF")
show_heatmap("HvsL")
show_heatmap("SvsD")
## common genes between FvsFinD and FvsFinS ------------
# their IDs and names
com_genes_S_D <- intersect(genelists$FvsFinS, genelists$FvsFinD)
symb_com_genes <- map(com_genes_S_D, \(x) {
unique(gtf$gene_name[which(gtf$gene_id==x)])
}
)
# fwrite(t(as.data.frame(symb_com_genes)), file = "names_common_genes_S_D.tsv", sep = ',')
com_genes_table <- as.data.table(com_genes_S_D)
com_genes_table$symbols <- t(as.data.table(symb_com_genes))
# fwrite(com_genes_table, file = "names_and_IDs_common_genes_S_D.tsv", sep = ',')
# interesting genes plots
plotexpr("ENSSSCG00000016420") #INSIG1
plotexpr("ENSSSCG00000000402") #RMBS2
plotexpr("ENSSSCG00000015106") #HYOU1
plotexpr("ENSSSCG00000008857") #MSMO1
plotexpr("ENSSSCG00000002425") #PTPN21
plotexpr("ENSSSCG00000010554") #SCD
mat <- txi$abundance[com_genes_S_D, ] |> t() |> scale() |> t()
row_dend <- as.dendrogram(hclust_olo(dist(mat))) #clustering
row_dend <- color_branches(row_dend, k = 2, col = c("UP_expressed" = "#423089", "DOWN_expressed" = "#ed6e6c"))
hm <- Heatmap(mat,
name = "Expression\n(Z-score)",
col = col_heatmap,
column_title = "Samples (24)",
row_title = "Genes (42)",
border_gp = gpar(col = "black", lty = 1),
top_annotation = ha,
show_row_names = FALSE,
show_column_names = FALSE,
use_raster = TRUE,
column_split = metadata$tissue, cluster_columns = TRUE, cluster_column_slices = FALSE,
)
plot(hm,
column_title = "Common genes between FvsFinD and FvsFinS",
column_title_gp = grid::gpar(fontsize=16))