Skip to content
Snippets Groups Projects
03_transcriptomics_stomach_and_duodenum.R 15.2 KiB
Newer Older
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
## Environment setup -----------
# packages
library(mixOmics)
library(data.table)
library(rtracklayer)
library(tximport)
library(stringr)
library(limma)
library(edgeR)
library(ggplot2)
library(patchwork)
library(ComplexHeatmap)
library(circlize)
library(purrr)
library(UpSetR)
library(svglite)
library(dendextend)
library(jsonlite)
library(ggbeeswarm)
library(seriation)
library(httr)
library(EnhancedVolcano)
library(magick)
library(rsvg)
library(statmod)

setwd("P:/Projet/rosepig_stomach")

TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
## metadata --------------------
metadata <- fread("metadata_both.txt")
gtf <- import(con = "Sus_scrofa.Sscrofa11.1.102.gtf", format = "GFF")

tx2gene <- fread("tx2gene.tsv")

TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
## tx_import ---------------
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
    metadata$tissue == "stomach",
    file.path("salmon" , metadata$nfcore_id, "quant.sf"),
    file.path("rnaseq_duodenum", "salmon", metadata$nfcore_id, "quant.sf")
)
    
txi <- tximport(files, type = "salmon", tx2gene = tx2gene, countsFromAbundance = "lengthScaledTPM") # browseVignettes("tximport")

TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
## Differential gene expression --------
dge <- DGEList(txi$counts)

keep <- filterByExpr(dge, min.count = 4, min.total.count = 8 )
# summary(keep)
# Mode   FALSE    TRUE 
# logical   16958   14822 

dge <- dge[keep, ]
dge <- calcNormFactors(dge)

TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
## limma voom -----------
# interaction model
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
    with(metadata, paste(feed, line, tissue, sep = "."))
)

design <- model.matrix(~ 0 + met)
contMatrix <- makeContrasts(
    FvsF = ((metfed.HRFI.duodenum + metfed.HRFI.stomach + metfed.LRFI.duodenum + metfed.LRFI.stomach) 
            - (metfasted.HRFI.duodenum + metfasted.HRFI.stomach + metfasted.LRFI.duodenum + metfasted.LRFI.stomach)),
    HvsL = ((metfed.HRFI.duodenum + metfed.HRFI.stomach + metfasted.HRFI.duodenum + metfasted.HRFI.stomach) 
            - (metfed.LRFI.duodenum + metfed.LRFI.stomach + metfasted.LRFI.duodenum + metfasted.LRFI.stomach)),
    SvsD = ((metfed.HRFI.stomach + metfasted.HRFI.stomach + metfed.LRFI.stomach + metfasted.LRFI.stomach)
            - (metfed.HRFI.duodenum + metfasted.HRFI.duodenum + metfed.LRFI.duodenum + metfasted.LRFI.duodenum)),
    SvsDinL = (metfed.LRFI.stomach + metfasted.LRFI.stomach) - (metfasted.LRFI.duodenum + metfed.LRFI.duodenum),
    SvsDinH = (metfed.HRFI.stomach + metfasted.HRFI.stomach) - (metfasted.HRFI.duodenum + metfed.HRFI.duodenum),
    FvsFinS = (metfed.HRFI.stomach + metfed.LRFI.stomach) - (metfasted.HRFI.stomach + metfasted.LRFI.stomach),
    FvsFinD = (metfed.HRFI.duodenum + metfed.LRFI.duodenum) - (metfasted.HRFI.duodenum + metfasted.LRFI.duodenum),
    SvsDinFed = (metfed.LRFI.stomach + metfed.HRFI.stomach) - (metfed.LRFI.duodenum + metfed.HRFI.duodenum),
    SvsDinFas = (metfasted.LRFI.stomach + metfasted.HRFI.stomach) - (metfasted.LRFI.duodenum + metfasted.HRFI.duodenum),
    FvsFinH = ((metfed.HRFI.stomach + metfed.HRFI.duodenum) - (metfasted.HRFI.stomach + metfasted.HRFI.duodenum)),
    FvsFinL = ((metfed.LRFI.stomach + metfed.LRFI.duodenum) - (metfasted.LRFI.stomach + metfasted.LRFI.duodenum)),
    levels = design
)

TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
# duplicate correlations, is txi$counts the appropriate object ?
block_corr <- duplicateCorrelation(txi$abundance[keep,], design = design, block = metadata$name)
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
# TODO : duplicateCorrelation in voom & lmfit
# https://forgemia.inra.fr/arthur.durante/pigheat_transcriptome/-/blob/GWAS/04_limma_environment_tempered.R#L122
# duplicateCorrelation(dge, design = design, block = metadata$name)
# https://forgemia.inra.fr/arthur.durante/pigheat_transcriptome/-/blob/GWAS/04_limma_environment_tempered.R#L128
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
# analysis
mydge <- voom(dge, design = design)
myfit <- lmFit(mydge, design, cor = block_corr$consesus.correlation)
myfit <- contrasts.fit(myfit, contrasts = contMatrix)
myfit <- eBayes(myfit)
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
summary(decideTests(myfit, adjust.method = "bonf", p.value=0.05))
#         FvsF  HvsL  SvsD SvsDinL SvsDinH FvsFinS FvsFinD SvsDinFed SvsDinFas with p.value = 0.05
# Down    2340  1082  6168    5711    5597    3771    1813      5494      5829
# NotSig  9751 12096  2516    3285    3781    7183   11221      3929      3006
# Up      2731  1644  6138    5826    5444    3868    1788      5399      5987

#         FvsF  HvsL  SvsD SvsDinL SvsDinH FvsFinS FvsFinD SvsDinFed SvsDinFas FvsFinH FvsFinL with Bonferroni and p value = 0.05
# Down     302    33  4221    3362    2951     612      88      2851      3631      92     142
# NotSig 14229 14744  6316    8020    8766   13632   14694      8915      7583   14651   14538
# Up       291    45  4285    3440    3105     578      40      3056      3608      79     142

TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
## PCA -------
pca_mat <- txi$abundance[keep,]
pca_mat <- t(pca_mat) |> scale()

acp <- pca(pca_mat, ncomp = 6, center = FALSE, scale = FALSE)


plot(acp)

plotVar(acp, var.names = FALSE, pch = 20)

TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
plotIndiv(acp, group =  paste(metadata$tissue), comp = c(1, 2), 
          ind.names = metadata$condition,
          legend = TRUE, title = 'PCA', ellipse = TRUE)

plotIndiv(acp, group = paste(metadata$line, metadata$tissue), ind.names = FALSE,
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
          legend = TRUE, title = 'PCA', pch = 19, ellipse = TRUE, ellipse.level = 0.83)

plotIndiv(
    acp, group = paste(metadata$line, metadata$tissue, metadata$feed), ind.names = FALSE,
    legend = TRUE, title = 'PCA', pch = 19, ellipse = FALSE,
    comp = c(1, 3)
)

plotIndiv(
    pca(pca_mat[metadata$line == "HRFI", ], ncomp = 4, center = FALSE, scale = FALSE),
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
    group = metadata$tissue[metadata$line == "HRFI"],
    ind.names = metadata$condition[metadata$line == "HRFI"],
    legend = TRUE, title = 'HRFI'
)
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
plotIndiv(
    pca(pca_mat[metadata$line == "HRFI", ], ncomp = 4, center = FALSE, scale = FALSE),
    group = metadata$tissue[metadata$line == "HRFI"],
    pch = 19,
    legend = TRUE, title = 'HRFI'
)

plotIndiv(
    pca(pca_mat[metadata$line == "LRFI", ], ncomp = 4, center = FALSE, scale = FALSE),
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
    group = metadata$tissue[metadata$line == "LRFI"],
    ind.names =  metadata$condition[metadata$line == "LRFI"],
    legend = TRUE, title = 'LRFI'
)
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
plotIndiv(
    pca(pca_mat[metadata$line == "LRFI", ], ncomp = 4, center = FALSE, scale = FALSE),
    group = metadata$tissue[metadata$line == "LRFI"],
    pch = 19,
    legend = TRUE, title = 'LRFI'
)
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
## upset plots -----------
genelists <- map(
    set_names(c("FvsF", "HvsL", "SvsD", "SvsDinL", "SvsDinH", "FvsFinS", 
                "FvsFinD", "SvsDinFed", "SvsDinFas", "FvsFinH", "FvsFinL")),
    function(x) {
        res <- decideTests(myfit, adjust.method = "bonfe", p.value = 0.05)[ , x]
        rownames(res[res != 0, ])
    }
)

x <- fromList(genelists[c("FvsFinS","FvsFinD")])
upset(x, 
      matrix.color = viridis::plasma(3, end = 0.90)[2] ,
      sets.bar.color = c(viridis::plasma(3, end = 0.90)[1:2]),
      main.bar.color = viridis::plasma(3, end = 0.90),
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
      set_size.numbers_size	= 10,
      group.by = "degree",
      shade.color = "lightblue",
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
      set_size.show = TRUE)

x <- fromList(genelists[c("SvsDinL", "SvsDinH","SvsDinFed", "SvsDinFas")])
upset(x,
      matrix.color = viridis::plasma(13, end = 0.90)[5] ,
      sets.bar.color = c(
                        viridis::plasma(13, end = 0.90)[1],
                        viridis::plasma(13, end = 0.90)[3], viridis::plasma(13, end = 0.90)[4],
                        viridis::plasma(13, end = 0.90)[2]),
      main.bar.color = viridis::plasma(13, end = 0.90),
      shade.color = "lightblue",
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
      set_size.numbers_size	= 10,
      set_size.show = TRUE)

x <- fromList(genelists[c("FvsFinH", "FvsFinL")])
upset(x, 
      matrix.color = viridis::plasma(3, end = 0.90)[2] ,
      sets.bar.color = c(viridis::plasma(3, end = 0.90)[1:2]),
      main.bar.color = viridis::plasma(3, end = 0.90),
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
      set_size.numbers_size	= 10,
      shade.color = "lightblue",
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
      set_size.show = TRUE)

x <- fromList(genelists[c("SvsD", "FvsF", "HvsL")])
upset(x, 
      matrix.color = viridis::plasma(6, end = 0.90)[3] ,
      sets.bar.color = c(viridis::plasma(6, end = 0.90)[1:3]),
      main.bar.color = c(viridis::plasma(6, end = 0.90)),
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
      set_size.numbers_size	= 10,
      shade.color = "lightblue",
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
      set_size.show = TRUE)
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
## genes expression plots ----------
topt_plots <- map(
    colnames(myfit$contrasts) |> set_names(),
    function(x) {
        topTable(myfit, coef = x, number = 100000)
    }
)

genes_cond_list <- map_chr(topt_plots, \(x) rownames(x[2,]))
plotexpr(genes_cond_list[1])

TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
metadata$linefeed <- with(metadata, paste(line,feed))

plotexpr <- function(gene) {
    expr <- t(txi$abundance[gene, , drop = FALSE])
    dfp <- metadata
    dfp$expr <- expr
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
    gene_name <- gtf$gene_name[which(gtf$gene_id == gene)]
    
    ggplot(dfp, aes(
        x = factor(ifelse(tissue == "stomach", "stomach","duodenum"), levels = c("stomach", "duodenum")),
        y = expr, fill = linefeed
    )) +
        geom_boxplot(width = 0.4, outlier.shape = NA) +
        geom_beeswarm(aes(col = linefeed), dodge.width = 0.4, size = 2, cex = 1.5) +
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
        coord_cartesian(ylim = c(0, NA)) +
        scale_fill_manual(values = c("HRFI fasted" = "#787cf077", "HRFI fed" = "#40d9ea77", "LRFI fasted" = "#bae61f77" , "LRFI fed" = "#f390da77")) +
        scale_colour_manual(values = c("HRFI fasted" = "#787cf0", "HRFI fed" = "#40d9ea", "LRFI fasted" = "#bae61f" , "LRFI fed" = "#f390da" )) +
        labs(title = gene_name, fill = "linefeed", x = "Tissue", y = "TPM") +
        theme_bw(base_size = 16) +
        theme(plot.title = element_text(size=16))
}

plotexpr("ENSSSCG00000021910") #gastrin
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
# txi$abundance["ENSSSCG00000021910",]
# [1]    0.000000    1.490134    0.143895    0.000000    0.203100    0.000000    6.526677    0.177739 8476.513934    0.000000
# [11]    0.000000    0.140140    0.000000    0.000000    0.000000    0.532045    0.000000    0.000000    0.000000    0.000000
# [21]    0.137095    0.200292    0.000000    0.000000   16.518600   17.059180   21.023289   20.458764   15.253007   16.526895
# [31]    9.134834   27.400042   43.088366   23.070951   34.766090   22.740114   14.574913   21.858075    5.688995   21.832952
# [41]   28.510405   33.485766   16.650450   21.381344   15.124753   23.930513   21.503706   26.768468

plotexpr("ENSSSCG00000011568") #ghrelin
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
# txi$abundance["ENSSSCG00000011568",]
# [1]  890.65636 1399.94170 1423.20974 1215.59458  985.95787  772.38344 1067.91157 1256.28097  709.75711  833.25861 1487.33306
# [12] 1706.33572 1281.05627 1008.28069 1549.65570 1298.07903 2344.05073 1441.92782 1333.72837  542.32684  776.45388 1326.46008
# [23] 1412.37424  626.51048  124.21014  194.94674  221.92149   73.08447   80.25826   83.88175  108.30528   86.97938  101.44263
# [34]   80.00987   96.07310   99.91766   59.61474   24.18648  110.45926   58.44334   96.77374   64.43409  143.35242   90.95452
# [45]  139.39761  176.78356   78.14629  111.40827

plotexpr("ENSSSCG00000006792") #pepsinogen

plotexpr("ENSSSCG00000013117") #cobalamin transport
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
# txi$abundance["ENSSSCG00000013117",]
# [1]   67.53061   70.26652   30.36416   18.92160  122.99833  194.09863   45.23172   68.15924 1415.04444   72.51830  139.74338
# [12]   78.68822   14.49267   27.47639  268.15168  347.88141   31.96975   89.85295  177.80368  239.57244   24.07132  296.80620
# [23]  151.43980   57.31725   96.36608  268.09668   45.31252   20.92685   81.76093  127.98916  103.57274   67.81729  170.77838
# [34]  108.80359  112.53825   58.53950   43.38521   71.45104  115.33778   38.83127   85.37446  130.92601   45.21819   36.13069
# [45]   26.26749   44.24684   91.11347   88.30288

plotexpr("ENSSSCG00000035529") #gastrokin 1

plotexpr("ENSSSCG00000033382") #mucin 5AC

plotexpr("ENSSSCG00000038802") #gastrokin 2

plotexpr("ENSSSCG00000034731") #trefoil factor 1

plotexpr("ENSSSCG00000001849") #best gene for SvsD

TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
## Complexeatmaps -----------
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
col <- list( #defining the desired colours for the annotations
    Line = c("LRFI" = "#423089", "HRFI" = "#ed6e6c"),
    Tissue = c("stomach" = "#0D1687", "duodenum" = "#BE3885"),
    Family = c("A" = "#008000", "B" = "#E59866","C" = "#A2D9CE", "D" = "#008080", "E" = "#F7DC6F", "F" = "#000080" ),
    Sex = c("F" = "#89014D", "M" = "#53848B"),
    Condition = c("fed" = "#1E8449", "fasted" = "#F39C12")
)

TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
ha <- HeatmapAnnotation( #defining the desired annotations
    Tissue = metadata$tissue,
    Line = metadata$line,
    Family = metadata$family,
    Condition = metadata$feed,
    Sex = metadata$sex,
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
    col = col
)

col_heatmap <- colorRamp2(c(-2.0, 0, 2.0), c("#5555FF", "white", "#FF5555")) #defining the legend range of colours

hclust_olo <- function(mdist, ...) { #clustering method function
    myClust <- hclust(mdist, ...)
    myOlo <- seriation::seriate(mdist, method = "OLO")
    seriation::permute(myClust, myOlo)
}


show_heatmap <- \(contrast){
    mat <- txi$abundance[genelists[[contrast]], ] |> t() |> scale() |> t()
    colnames(mat) <- metadata$condition
    row_dend <- as.dendrogram(hclust_olo(dist(mat))) #clustering
    row_dend <- color_branches(row_dend, k = 2, col = c("UP_expressed" = "#423089", "DOWN_expressed" = "#ed6e6c"))
    hm <- Heatmap(mat, 
                  name = "Expression\n(Z-score)",
                  col = col_heatmap,
                  column_title = "Samples (24)",
                  row_title = paste("Genes (",length(genelists[[contrast]]),")"),
                  border_gp = gpar(col = "black", lty = 1),
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
                  cluster_rows = row_dend,
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
                  show_row_names = FALSE,
                  show_column_names = FALSE,
                  use_raster = TRUE,
                  column_split = metadata$tissue, cluster_columns = TRUE, cluster_column_slices = FALSE,
    )
    plot(hm,
         column_title = contrast,
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
         column_title_gp = grid::gpar(fontsize=16))
}

show_heatmap("FvsF")
show_heatmap("HvsL")
show_heatmap("SvsD")

TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
## common genes between FvsFinD and FvsFinS ------------
# their IDs and names
com_genes_S_D <- intersect(genelists$FvsFinS, genelists$FvsFinD)
symb_com_genes <- map(com_genes_S_D, \(x) {
    unique(gtf$gene_name[which(gtf$gene_id==x)])
}
)

# fwrite(t(as.data.frame(symb_com_genes)), file = "names_common_genes_S_D.tsv", sep = ',')

com_genes_table <- as.data.table(com_genes_S_D)
com_genes_table$symbols <- t(as.data.table(symb_com_genes))

# fwrite(com_genes_table, file = "names_and_IDs_common_genes_S_D.tsv", sep = ',')

TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
# enrichment done with g:Profiler
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
# interesting genes plots
plotexpr("ENSSSCG00000016420") #INSIG1
plotexpr("ENSSSCG00000000402") #RMBS2
plotexpr("ENSSSCG00000015106") #HYOU1
plotexpr("ENSSSCG00000008857") #MSMO1
plotexpr("ENSSSCG00000002425") #PTPN21
plotexpr("ENSSSCG00000010554") #SCD
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
# heatmap
mat <- txi$abundance[com_genes_S_D, ] |> t() |> scale() |> t()
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
colnames(mat) <- metadata$condition
row_dend <- as.dendrogram(hclust_olo(dist(mat))) #clustering
row_dend <- color_branches(row_dend, k = 2, col = c("UP_expressed" = "#423089", "DOWN_expressed" = "#ed6e6c"))
hm <- Heatmap(mat, 
              name = "Expression\n(Z-score)",
              col = col_heatmap,
              column_title = "Samples (24)",
              row_title = "Genes (42)",
              border_gp = gpar(col = "black", lty = 1),
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
              cluster_rows = row_dend,
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
              show_row_names = FALSE,
              show_column_names = FALSE,
              use_raster = TRUE,
              column_split = metadata$tissue, cluster_columns = TRUE, cluster_column_slices = FALSE,
TEISSEIRE EMMA's avatar
TEISSEIRE EMMA committed
     column_title = "Common genes between FvsFinD and FvsFinS",
     column_title_gp = grid::gpar(fontsize=16))