Commit 29ce4bea authored by Noort's avatar Noort
Browse files

add barcodes

parent 59071a4a
# @author: Maite van den Noort
# @Date: 30-12-2020, last update: 7-01-2021
# @function:
library(data.table)
library(dplyr)
library(tidyverse)
library(reshape2)
library(ggplot2)
# data from library 1 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
# data from library 2 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/results/output_data_scripts/Figures_barcodes/")
# making subsets for every crick/watson strands from every sample form library1
Watson1 <- barcode_data_lib1[grepl("Watson", barcode_data_lib1$Filename),]
Crick1 <- barcode_data_lib1[grepl("Crick", barcode_data_lib1$Filename),]
Watson2 <- barcode_data_lib2[grepl("Watson", barcode_data_lib2$Filename),]
Crick2 <- barcode_data_lib2[grepl("Crick", barcode_data_lib2$Filename),]
# making new tabels with one columns of two tables into a watson table and a crick table
WC1 <- data.table(Watson1$Retained+Crick1$Retained)
WC2 <- data.table(Watson2$Retained+Crick2$Retained)
# bind watson and crick together
WatsonCrick <- rbind(WC1, WC2)
# bind the two datasets together
data = rbind(barcode_data_lib1, barcode_data_lib2)
# here it is ensured that we only have all samples once by just grabbing the watson samples
data <- data[grepl("Watson", data$Filename),]
# making a new table with all the information necessary for the plots
datalib1and2 <- data.table(Barcode = data$Barcode, Filename = data$Filename, Retained = WatsonCrick$V1)
# making subsets for every sample name from every sample form library1
lib1A <- datalib1and2[grepl("^A", datalib1and2$Filename),]
lib1B <- datalib1and2[grepl("^B", datalib1and2$Filename),]
lib2C <- datalib1and2[grepl("^C", datalib1and2$Filename),]
lib2D <- datalib1and2[grepl("^D", datalib1and2$Filename),]
sum(lib1A$Retained)
# making an extra column with percentage for both librarys
lib1A$Retained_pct1 = (lib1A$Retained / sum(lib1A$Retained)) *100
lib1B$Retained_pct2 = (lib1B$Retained / sum(lib1B$Retained)) *100
lib2C$Retained_pct3 = (lib2C$Retained / sum(lib2C$Retained)) *100
lib2D$Retained_pct4 = (lib2D$Retained / sum(lib2D$Retained)) *100
lib1A$Barcodes <- paste(lib1A$Barcode, "|", lib1B$Barcode)
# making a new table with all the information necessary for the plots
datalib <- data.table(Barcode = lib1A$Barcodes , Retained1 = lib1A$Retained, Retained2 = lib1B$Retained, Retained3 = lib2C$Retained, Retained4 = lib2D$Retained)
# making a new table with all the information necessary for the plots with percentage
datalib1and2per <- data.table(Barcode = lib1A$Barcodes, Retainedper1 = lib1A$Retained_pct1, Retainedper2 = lib1B$Retained_pct2, Retainedper3 = lib2C$Retained_pct3, Retainedper4 = lib2D$Retained_pct4)
datalib1and2per1 <- data.table(Barcode = lib1A$Barcode, Retainedper1 = lib1A$Retained_pct1,Retainedper3 = lib2C$Retained_pct3)
datalib1and2per2 <- data.table(Barcode = lib1B$Barcode, Retainedper2 = lib1B$Retained_pct2,Retainedper4 = lib2D$Retained_pct4)
# reshape the table, for an good figure
lib1and2retained <- melt(datalib, id.var="Barcode")
# reshape the table, for an good figure with percentage
lib1and2retainedper <- melt(datalib1and2per, id.var="Barcode")
lib1and2retainedper1 <- melt(datalib1and2per1, id.var="Barcode")
lib1and2retainedper2 <- melt(datalib1and2per2, id.var="Barcode")
# making the ggplot for barcode_data_lib1 and 2 together in percentage ordered by sample names A and C(dodge)
ggplot(lib1and2retainedper1, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes in percentage combined, sample A and B (dodge)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample C")) + #changing the labels in the legend
#scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
geom_text(aes(label = round(value, digits = 2)), size = 3, hjust = 0, position = position_dodge2(width = 0.5), angle = 90) # printing the values vertical in the middel of the bar
+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_lib1_and_2_combined_sampleAC_dodge.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together in percentage ordered by sample names B and D(dodge)
ggplot(lib1and2retainedper2, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes in percentage combined, sample B and D (dodge)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_discrete(name = "Retained samples", labels = c("sample B", "sample D")) + #changing the labels in the legend
#scale_fill_manual(values = c("#7CAE00", "#C77CFF")) + # give the different values a nice color
geom_text(aes(label = round(value, digits = 2)), size = 3, hjust = 0, position = position_dodge2(width = 0.5), angle = 90) # printing the values vertical in the middel of the bar
+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_lib1_and_2_combined_sampleBD_dodge.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together ordered by sample names (dodge)
# ggplot(lib1and2retained, aes(y= value, fill= variable, x= Barcode)) +
# geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot
# labs(y = 'number of retained barcodes', x = 'Barcodes') + # giving the labels names
# ggtitle("Library 1 and 2 retained barcodes combined(dodge)") + # giving the plot a title
# scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
# theme_minimal()+ # making the background nicer
# scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample B", "sample C", "sample D")) + #changing the labels in the legend
# #scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
# geom_text(aes(label = value), size = 3, hjust = 0, position = position_dodge2(width = 0.5), angle = 90) # printing the values vertical in the middel of the bar
# + ggsave(paste(outputFigures,"Retained_barcodes_sample_names_combined_dodge.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together in percentage ordered by sample names (dodge)
ggplot(lib1and2retainedper, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes in percentage combined (dodge)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample B", "sample C", "sample D")) + #changing the labels in the legend
#scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
geom_text(aes(label = round(value, digits = 2)), size = 3, hjust = 0, position = position_dodge2(width = 0.5), angle = 90) # printing the values vertical in the middel of the bar
+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_lib1_and_2_combined_dodge.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together ordered by sample names (stacked)
# ggplot(lib1and2retained, aes(y= value, fill= variable, x= Barcode)) +
# geom_bar(position="stack", stat="identity", color ="gray")+ # using nice colors and making it a stacked barplot
# labs(y = 'number of retained barcodes', x = 'Barcodes') + # giving the labels names
# ggtitle("Library 1 and 2 retained barcodes combined (stacked)") + # giving the plot a title
# scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
# theme_minimal()+ # making the background nicer
# scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample B", "sample C", "sample D")) + #changing the labels in the legend
# #scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
# geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") # printing the values horizontal in the middel of the bar
# + ggsave(paste(outputFigures,"Retained_barcodes_sample_names_combined_stacked.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together in percentage ordered by sample names (stacked)
ggplot(lib1and2retainedper, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(position="stack", stat="identity", color ="gray")+ # using nice colors and making it a stacked barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes in percentage combined (stacked)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample B", "sample C", "sample D")) + #changing the labels in the legend
#scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
geom_text(aes(label = round(value, digits = 2)), size = 3, hjust = 0.5, vjust = 3, position = "stack") # printing the values horizontal in the middel of the bar
#+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_sample_names_combined_stacked.png",sep="")) # saving the figure at the specified location
library(ggplot2)
library(broman)
#library 1
# data from library 1 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/results/output_data_scripts/Figures_barcodes/")
# making variables to use beautiful colors in the graph
blue <- brocolors("crayons")["Cornflower"]
darkblue <- brocolors("crayons")["Blue Gray"]
# making the ggplot for barcode_data_lib1
ggplot(barcode_data_lib1, aes(y= Retained, x= reorder(Barcode, -Retained))) +
geom_bar(position="stack", stat="identity", fill= blue, color= darkblue)+ # using nice colors and making it a barplot
labs(y = 'number of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 retained barcodes") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
geom_text(aes(label = Retained), size = 3, hjust = 0.5, position = position_stack(vjust = 0.5), angle = 90) + # printing the values vertical in the middel of the bar
ggsave(paste(outputFigures,"Retained_barcodes_lib1.png",sep="")) # saving the figure at the specified location
# making an extra column with percentage
barcode_data_lib1$retained_pct = (barcode_data_lib1$Retained / sum(barcode_data_lib1$Retained)) *100
# rounding off the percentage in the column
round(barcode_data_lib1$retained_pct, digits = 3)
# making the ggplot for barcode_data_lib1 with percentage
ggplot(barcode_data_lib1, aes(y= retained_pct, x= reorder(Barcode, -retained_pct))) +
geom_bar(position="stack", stat="identity", fill= blue, color= darkblue)+ # using nice colors and making it a barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 retained barcodes in percentage") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
geom_text(aes(label = round(retained_pct, digits = 3)), size = 3, hjust = 0.5, position = position_stack(vjust = 0.5), angle = 90)+ # printing the rounded values vertical in the middel of the bar
ggsave(paste(outputFigures,"Retained_barcodes_percentage_lib1.png",sep=""))
# if there is 22 in the column Filename remove the row
barcode_data_lib1_without22 <- barcode_data_lib1[!grepl("22", barcode_data_lib1$Filename),]
# making the ggplot for barcode_data_lib1 without the sample with 22 in the name
ggplot(barcode_data_lib1_without22, aes(y= Retained, x= reorder(Barcode, -Retained))) +
geom_bar(position="stack", stat="identity", fill= blue, color= darkblue)+ # using nice colors and making it a barplot
labs(y = 'number of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 retained barcodes without sample 22") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
geom_text(aes(label = Retained), size = 3, hjust = 0.5, position = position_stack(vjust = 0.5), angle = 90)+ # printing the values vertical in the middel of the bar
ggsave(paste(outputFigures,"Retained_barcodes_without22_lib1.png",sep="")) # saving the figure at the specified location
# @author: Maite van den Noort
# @Date: 16-12-2020
# @function: makes ggplots of the retained reads from library 1 and 2.
library(data.table)
library(dplyr)
library(tidyverse)
library(reshape2)
library(ggplot2)
# data from library 1 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
# data from library 2 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/results/output_data_scripts/Figures_barcodes/")
# making an extra column with percentage for both librarys
barcode_data_lib1$Retained_pct1 = (barcode_data_lib1$Retained / sum(barcode_data_lib1$Retained)) *100
barcode_data_lib2$Retained_pct2 = (barcode_data_lib2$Retained / sum(barcode_data_lib2$Retained)) *100
# making a new table with all the information necessary for the plots
datalib1and2 <- data.table(Barcode = barcode_data_lib1$Barcode,Retained1 = barcode_data_lib1$Retained, Retained2 = barcode_data_lib2$Retained)
# making a new table with all the information necessary for the plots with percentage
datalib1and2per <- data.table(Barcode = barcode_data_lib1$Barcode,Retainedper1 = barcode_data_lib1$Retained_pct1, Retainedper2 = barcode_data_lib2$Retained_pct2)
# reshape the table, for an good figure
lib1and2retained <- melt(datalib1and2, id.var="Barcode")
# reshape the table, for an good figure with percentage
lib1and2retainedper <- melt(datalib1and2per, id.var="Barcode")
# making the ggplot for barcode_data_lib1 and 2 together (dodge)
ggplot(lib1and2retained, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot
labs(y = 'number of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes (dodge)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_manual(values = c("skyblue", "purple")) + # give the different values a nice color
geom_text(aes(label = value), size = 3, hjust = 0, position = position_dodge2(width = 1), angle = 90) # printing the values vertical in the middel of the bar
+ ggsave(paste(outputFigures,"Retained_barcodes_lib1_and_2_dodge.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together in percentage (dodge)
ggplot(lib1and2retainedper, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes in percentage (dodge)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_manual(values = c("skyblue", "purple")) + # give the different values a nice color
geom_text(aes(label = round(value, digits = 2)), size = 3, hjust = 0, position = position_dodge2(width = 1), angle = 90) # printing the values vertical in the middel of the bar
+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_lib1_and_2_dodge.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together (stacked)
ggplot(lib1and2retained, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(position="stack", stat="identity", color ="gray")+ # using nice colors and making it a stacked barplot
labs(y = 'number of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes (stacked)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_manual(values = c("skyblue", "purple")) + # give the different values a nice color
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") # printing the values horizontal in the middel of the bar
+ ggsave(paste(outputFigures,"Retained_barcodes_lib1_and_2_stacked.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together in percentage (stacked)
ggplot(lib1and2retainedper, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(position="stack", stat="identity", color ="gray")+ # using nice colors and making it a stacked barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes in percentage (stacked)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_manual(values = c("skyblue", "purple")) + # give the different values a nice color
geom_text(aes(label = round(value, digits = 2)), size = 3, hjust = 0.5, vjust = 3, position = "stack") # printing the values horizontal in the middel of the bar
+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_lib1_and_2_stacked.png",sep="")) # saving the figure at the specified location
library(ggplot2)
library(broman)
#library 2
# data from library 2 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/results/output_data_scripts/Figures_barcodes/")
# making variables to use beautiful colors in the graph
blue <- brocolors("crayons")["Blue Violet"]
darkblue <- brocolors("crayons")["Blue Gray"]
# making the ggplot for barcode_data_lib2
ggplot(barcode_data_lib2, aes(y= Retained, x= reorder(Barcode, -Retained))) +
geom_bar(position="stack", stat="identity", fill= blue, color= darkblue)+ # using nice colors and making it a barplot
labs(y = 'number of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 2 retained barcodes") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
geom_text(aes(label = Retained), size = 3, hjust = 0.5, position = position_stack(vjust = 0.5), angle = 90) + # printing the values vertical in the middel of the bar
ggsave(paste(outputFigures,"Retained_barcodes_lib2.png",sep="")) # saving the figure at the specified location
# making an extra column with percentage
barcode_data_lib2$retained_pct = (barcode_data_lib2$Retained / sum(barcode_data_lib2$Retained)) *100
# rounding off the percentage in the column
round(barcode_data_lib2$retained_pct, digits = 3)
# making the ggplot for barcode_data_lib2 wtih percentage
ggplot(barcode_data_lib2, aes(y= retained_pct, x= reorder(Barcode, -retained_pct))) +
geom_bar(position="stack", stat="identity", fill= blue, color= darkblue)+ # using nice colors and making it a barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 2 retained barcodes in percentage") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
geom_text(aes(label = round(retained_pct, digits = 3)), size = 3, hjust = 0.5, position = position_stack(vjust = 0.5), angle = 90)+ # printing the rounded values vertical in the middel of the bar
ggsave(paste(outputFigures,"Retained_barcodes_percentage_lib2.png",sep="")) # saving the figure at the specified location
#if there is 22 in filename remove the row
barcode_data_lib2_without22 <- barcode_data_lib2[!grepl("22", barcode_data_lib2$Filename),]
# making the ggplot for barcode_data_lib2 without the sample with 22 in the name
ggplot(barcode_data_lib2_without22, aes(y= Retained, x= reorder(Barcode, -Retained))) +
geom_bar(position="stack", stat="identity", fill= blue, color= darkblue)+ # using nice colors and making it a barplot
labs(y = 'number of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 2 retained barcodes without sample 22") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
geom_text(aes(label = Retained), size = 3, hjust = 0.5, position = position_stack(vjust = 0.5), angle = 90)+ # printing the values vertical in the middel of the bar
ggsave(paste(outputFigures,"Retained_barcodes_without22_lib2.png",sep="")) # saving the figure at the specified location
# @author: Maite van den Noort
# @Date: 05-01-2021
# @function: makes ggplot (lollipop plot) of the retained reads from library 1 and 2.
library(ggplot2)
library(tidyr)
# data from library 1 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
# data from library 2 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/results/output_data_scripts/Figures_barcodes/")
# merging the table together by barcode
merged <- merge(barcode_data_lib1, barcode_data_lib2, by = "Barcode")
# caculate the percentage of each number retained barcode per library
sum_lib1 <- sum(merged$Retained.x)
sum_lib2 <- sum(merged$Retained.y)
merged$library1 <- (merged$Retained.x/sum_lib1)*100
merged$library2 <- (merged$Retained.y/sum_lib2)*100
# making library2 negative to show it nicer in the plot
merged$library2 <- merged$library2*-1
# the data is transferred to another form, with all the values in Retained.norm
data_long <- pivot_longer(merged, cols =c(library1, library2), values_to = "Retained.norm")
# indicate the different levels (libraries)
levels(data_long$Retained.norm) <- c("library 1", "library 2")
# chaning the name of the comlumn name into Retained_librarys
names(data_long)[names(data_long) == "name"] <- "Retained_librarys"
# making the ggplot (lolliplot) from the data
ggplot(data_long, aes(Retained.norm, Barcode)) +
geom_segment(aes(x = 0, y = Barcode, xend = Retained.norm, yend = Barcode), color = "grey50") +
geom_point(aes(color = Retained_librarys), size = 2.7) + # here he knows that there are library 1 and 2 with a different collor with te size 2.7
scale_fill_discrete(name = "Retained library's", labels = c("library 1", "library 2")) +
ggtitle("Retained reads per library") +
labs(x = 'percentage of retained barcodes', y = 'Barcodes') +
geom_vline(xintercept = 0, size = 0.2) # adding a vertical line on position 0 with the size 0.2
#+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_lib1_and_2_lollipop.png",sep="")) # saving the figure at the specified location
# @author: Maite van den Noort
# @Date: 28-12-2020
# @function: makes ggplots of the retained reads from library 1 and 2, showed by sample names.
library(data.table)
library(dplyr)
library(tidyverse)
library(reshape2)
library(ggplot2)
# data from library 1 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
# data from library 2 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/results/output_data_scripts/Figures_barcodes/")
# making subsets for every crick/watson strands from every sample form library1
lib1A <- barcode_data_lib1[grepl("^A", barcode_data_lib1$Filename),]
lib1B <- barcode_data_lib1[grepl("^B", barcode_data_lib1$Filename),]
lib2C <- barcode_data_lib2[grepl("^C", barcode_data_lib2$Filename),]
lib2D <- barcode_data_lib2[grepl("^D", barcode_data_lib2$Filename),]
# making an extra column with percentage for both librarys
lib1A$Retained_pct1 = (lib1A$Retained / sum(lib1A$Retained)) *100
lib1B$Retained_pct2 = (lib1B$Retained / sum(lib1B$Retained)) *100
lib2C$Retained_pct3 = (lib2C$Retained / sum(lib2C$Retained)) *100
lib2D$Retained_pct4 = (lib2D$Retained / sum(lib2D$Retained)) *100
# making a new table with all the information necessary for the plots
datalib1and2 <- data.table(Barcode = lib1A$Barcode, Retained1 = lib1A$Retained, Retained2 = lib1B$Retained, Retained3 = lib2C$Retained, Retained4 = lib2D$Retained)
# making a new table with all the information necessary for the plots with percentage
datalib1and2per <- data.table(Barcode = lib1A$Barcode, Retainedper1 = lib1A$Retained_pct1, Retainedper2 = lib1B$Retained_pct2, Retainedper3 = lib2C$Retained_pct3, Retainedper4 = lib2D$Retained_pct4)
# reshape the table, for an good figure
lib1and2retained <- melt(datalib1and2, id.var="Barcode")
# reshape the table, for an good figure with percentage
lib1and2retainedper <- melt(datalib1and2per, id.var="Barcode")
# making the ggplot for barcode_data_lib1 and 2 together ordered by sample names (dodge)
ggplot(lib1and2retained, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot
labs(y = 'number of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes (dodge)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample B", "sample C", "sample D")) + #changing the labels in the legend
#scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
geom_text(aes(label = value), size = 3, hjust = 0, position = position_dodge2(width = 1), angle = 90) # printing the values vertical in the middel of the bar
+ ggsave(paste(outputFigures,"Retained_barcodes_sample_names_dodge.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together in percentage ordered by sample names (dodge)
ggplot(lib1and2retainedper, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes in percentage (dodge)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample B", "sample C", "sample D")) + #changing the labels in the legend
#scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
geom_text(aes(label = round(value, digits = 2)), size = 3, hjust = 0, position = position_dodge2(width = 1), angle = 90) # printing the values vertical in the middel of the bar
+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_lib1_and_2_dodge.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together ordered by sample names (stacked)
ggplot(lib1and2retained, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(position="stack", stat="identity", color ="gray")+ # using nice colors and making it a stacked barplot
labs(y = 'number of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes (stacked)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample B", "sample C", "sample D")) + #changing the labels in the legend
#scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") # printing the values horizontal in the middel of the bar
+ ggsave(paste(outputFigures,"Retained_barcodes_sample_names_stacked.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together in percentage ordered by sample names (stacked)
ggplot(lib1and2retainedper, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(position="stack", stat="identity", color ="gray")+ # using nice colors and making it a stacked barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes in percentage (stacked)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample B", "sample C", "sample D")) + #changing the labels in the legend
#scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
geom_text(aes(label = round(value, digits = 2)), size = 3, hjust = 0.5, vjust = 3, position = "stack") # printing the values horizontal in the middel of the bar
+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_sample_names_stacked.png",sep="")) # saving the figure at the specified location
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment