Commit 9bbfb365 authored by Noort's avatar Noort
Browse files

src update

parent c8d0c2d7
# @author: Maite van den Noort
# @Date: 30-12-2020, last update: 7-01-2021
# @function:
# @function: makes ggplots with watson and crick together (combined) about the retained barcodes
# shows it per sample name.
library(data.table)
library(dplyr)
library(tidyverse)
library(reshape2)
library(ggplot2)
# data from library 1 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
# data from library 2 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/results/output_data_scripts/Figures_barcodes/")
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_barcodes/")
# making subsets for every crick/watson strands from every sample form library1
Watson1 <- barcode_data_lib1[grepl("Watson", barcode_data_lib1$Filename),]
......@@ -37,6 +38,8 @@ lib1A <- datalib1and2[grepl("^A", datalib1and2$Filename),]
lib1B <- datalib1and2[grepl("^B", datalib1and2$Filename),]
lib2C <- datalib1and2[grepl("^C", datalib1and2$Filename),]
lib2D <- datalib1and2[grepl("^D", datalib1and2$Filename),]
lib1A$Filename <- gsub("-Watson", "",lib1A$Filename)
lib1A$Filename <- gsub("^A", "", lib1A$Filename)
sum(lib1A$Retained)
......@@ -45,7 +48,7 @@ lib1A$Retained_pct1 = (lib1A$Retained / sum(lib1A$Retained)) *100
lib1B$Retained_pct2 = (lib1B$Retained / sum(lib1B$Retained)) *100
lib2C$Retained_pct3 = (lib2C$Retained / sum(lib2C$Retained)) *100
lib2D$Retained_pct4 = (lib2D$Retained / sum(lib2D$Retained)) *100
lib1A$Barcodes <- paste(lib1A$Barcode, "|", lib1B$Barcode)
lib1A$Barcodes <- paste(lib1A$Barcode, "|", lib1B$Barcode, "|", lib1A$Filename)
# making a new table with all the information necessary for the plots
datalib <- data.table(Barcode = lib1A$Barcodes , Retained1 = lib1A$Retained, Retained2 = lib1B$Retained, Retained3 = lib2C$Retained, Retained4 = lib2D$Retained)
......
# @author: Maite van den Noort
# @Date: 30-12-2020, last update: 14-01-2021
# @function: makes ggplots with watson and crick together (combined) about the retained barcodes
# shows it per sample name. without sample 22
library(data.table)
library(dplyr)
library(tidyverse)
library(reshape2)
library(ggplot2)
# data from library 1 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
# data from library 2 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
# if there is 22 in the column Filename remove the row
barcode_data_lib1 <- barcode_data_lib1[!grepl("22", barcode_data_lib1$Filename),]
barcode_data_lib2 <- barcode_data_lib2[!grepl("22", barcode_data_lib2$Filename),]
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_barcodes/")
# making subsets for every crick/watson strands from every sample form library1
Watson1 <- barcode_data_lib1[grepl("Watson", barcode_data_lib1$Filename),]
Crick1 <- barcode_data_lib1[grepl("Crick", barcode_data_lib1$Filename),]
Watson2 <- barcode_data_lib2[grepl("Watson", barcode_data_lib2$Filename),]
Crick2 <- barcode_data_lib2[grepl("Crick", barcode_data_lib2$Filename),]
# making new tabels with one columns of two tables into a watson table and a crick table
WC1 <- data.table(Watson1$Retained+Crick1$Retained)
WC2 <- data.table(Watson2$Retained+Crick2$Retained)
# bind watson and crick together
WatsonCrick <- rbind(WC1, WC2)
# bind the two datasets together
data = rbind(barcode_data_lib1, barcode_data_lib2)
# here it is ensured that we only have all samples once by just grabbing the watson samples
data <- data[grepl("Watson", data$Filename),]
# making a new table with all the information necessary for the plots
datalib1and2 <- data.table(Barcode = data$Barcode, Filename = data$Filename, Retained = WatsonCrick$V1)
# making subsets for every sample name from every sample form library1
lib1A <- datalib1and2[grepl("^A", datalib1and2$Filename),]
lib1B <- datalib1and2[grepl("^B", datalib1and2$Filename),]
lib2C <- datalib1and2[grepl("^C", datalib1and2$Filename),]
lib2D <- datalib1and2[grepl("^D", datalib1and2$Filename),]
lib1A$Filename <- gsub("-Watson", "",lib1A$Filename)
lib1A$Filename <- gsub("^A", "", lib1A$Filename)
sum(lib1A$Retained)
# making an extra column with percentage for both librarys
lib1A$Retained_pct1 = (lib1A$Retained / sum(lib1A$Retained)) *100
lib1B$Retained_pct2 = (lib1B$Retained / sum(lib1B$Retained)) *100
lib2C$Retained_pct3 = (lib2C$Retained / sum(lib2C$Retained)) *100
lib2D$Retained_pct4 = (lib2D$Retained / sum(lib2D$Retained)) *100
lib1A$Barcodes <- paste(lib1A$Barcode, "|", lib1B$Barcode, "|", lib1A$Filename)
# making a new table with all the information necessary for the plots
datalib <- data.table(Barcode = lib1A$Barcodes , Retained1 = lib1A$Retained, Retained2 = lib1B$Retained, Retained3 = lib2C$Retained, Retained4 = lib2D$Retained)
# making a new table with all the information necessary for the plots with percentage
datalib1and2per <- data.table(Barcode = lib1A$Barcodes, Retainedper1 = lib1A$Retained_pct1, Retainedper2 = lib1B$Retained_pct2, Retainedper3 = lib2C$Retained_pct3, Retainedper4 = lib2D$Retained_pct4)
datalib1and2per1 <- data.table(Barcode = lib1A$Barcode, Retainedper1 = lib1A$Retained_pct1,Retainedper3 = lib2C$Retained_pct3)
datalib1and2per2 <- data.table(Barcode = lib1B$Barcode, Retainedper2 = lib1B$Retained_pct2,Retainedper4 = lib2D$Retained_pct4)
# reshape the table, for an good figure
lib1and2retained <- melt(datalib, id.var="Barcode")
# reshape the table, for an good figure with percentage
lib1and2retainedper <- melt(datalib1and2per, id.var="Barcode")
lib1and2retainedper1 <- melt(datalib1and2per1, id.var="Barcode")
lib1and2retainedper2 <- melt(datalib1and2per2, id.var="Barcode")
# making the ggplot for barcode_data_lib1 and 2 together in percentage ordered by sample names A and C(dodge)
ggplot(lib1and2retainedper1, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes in percentage combined, sample A and B, without sample 22 (dodge)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample C")) + #changing the labels in the legend
#scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
geom_text(aes(label = round(value, digits = 2)), size = 3, hjust = 0, position = position_dodge2(width = 0.5), angle = 90) # printing the values vertical in the middel of the bar
+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_lib1_and_2_combined_sampleAC_dodge_min22.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together in percentage ordered by sample names B and D(dodge)
ggplot(lib1and2retainedper2, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes in percentage combined, sample B and D, without sample 22(dodge)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_discrete(name = "Retained samples", labels = c("sample B", "sample D")) + #changing the labels in the legend
#scale_fill_manual(values = c("#7CAE00", "#C77CFF")) + # give the different values a nice color
geom_text(aes(label = round(value, digits = 2)), size = 3, hjust = 0, position = position_dodge2(width = 0.5), angle = 90) # printing the values vertical in the middel of the bar
+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_lib1_and_2_combined_sampleBD_dodge_min22.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together ordered by sample names (dodge)
# ggplot(lib1and2retained, aes(y= value, fill= variable, x= Barcode)) +
# geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot
# labs(y = 'number of retained barcodes', x = 'Barcodes') + # giving the labels names
# ggtitle("Library 1 and 2 retained barcodes combined(dodge)") + # giving the plot a title
# scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
# theme_minimal()+ # making the background nicer
# scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample B", "sample C", "sample D")) + #changing the labels in the legend
# #scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
# geom_text(aes(label = value), size = 3, hjust = 0, position = position_dodge2(width = 0.5), angle = 90) # printing the values vertical in the middel of the bar
# + ggsave(paste(outputFigures,"Retained_barcodes_sample_names_combined_dodge.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together in percentage ordered by sample names (dodge)
ggplot(lib1and2retainedper, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes in percentage combined without sample 22 (dodge)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample B", "sample C", "sample D")) + #changing the labels in the legend
#scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
geom_text(aes(label = round(value, digits = 2)), size = 3, hjust = 0, position = position_dodge2(width = 0.5), angle = 90) # printing the values vertical in the middel of the bar
+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_lib1_and_2_combined_dodge_min22.png",sep="")) # saving the figure at the specified location
#making the ggplot for barcode_data_lib1 and 2 together ordered by sample names (stacked)
# ggplot(lib1and2retained, aes(y= value, fill= variable, x= Barcode)) +
# geom_bar(position="stack", stat="identity", color ="gray")+ # using nice colors and making it a stacked barplot
# labs(y = 'number of retained barcodes', x = 'Barcodes') + # giving the labels names
# ggtitle("Library 1 and 2 retained barcodes combined (stacked)") + # giving the plot a title
# scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
# theme_minimal()+ # making the background nicer
# scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample B", "sample C", "sample D")) + #changing the labels in the legend
# #scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
# geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") # printing the values horizontal in the middel of the bar
# + ggsave(paste(outputFigures,"Retained_barcodes_sample_names_combined_stacked.png",sep="")) # saving the figure at the specified location
# making the ggplot for barcode_data_lib1 and 2 together in percentage ordered by sample names (stacked)
ggplot(lib1and2retainedper, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(position="stack", stat="identity", color ="gray")+ # using nice colors and making it a stacked barplot
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes in percentage combined without sample 22 (stacked)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
theme_minimal()+ # making the background nicer
scale_fill_discrete(name = "Retained samples", labels = c("sample A", "sample B", "sample C", "sample D")) + #changing the labels in the legend
#scale_fill_manual(values = c("skyblue", "purple", "yellow", "red")) + # give the different values a nice color
geom_text(aes(label = round(value, digits = 2)), size = 3, hjust = 0.5, vjust = 3, position = "stack") # printing the values horizontal in the middel of the bar
+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_sample_names_combined_stacked.png",sep="")) # saving the figure at the specified location
......@@ -5,21 +5,38 @@ library(ggplot2)
library(tidyr)
# data from library 1 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
# data from library 2 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/results/output_data_scripts/Figures_barcodes/")
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_barcodes/")
# making subsets for every crick/watson strands from every sample form library1
Watson1 <- barcode_data_lib1[grepl("Watson", barcode_data_lib1$Filename),]
Crick1 <- barcode_data_lib1[grepl("Crick", barcode_data_lib1$Filename),]
Watson2 <- barcode_data_lib2[grepl("Watson", barcode_data_lib2$Filename),]
Crick2 <- barcode_data_lib2[grepl("Crick", barcode_data_lib2$Filename),]
# calculating percentage
Watson1$per <- (Watson1$Retained / sum(Watson1$Retained)) *100
Crick1$per <- (Crick1$Retained / sum(Crick1$Retained)) *100
Watson2$per <- (Watson2$Retained / sum(Watson2$Retained)) *100
Crick2$per <- (Crick2$Retained / sum(Crick2$Retained)) *100
# making new tabels with one columns of two tables into a watson table and a crick table
data1 <- barcode_data_lib1[grepl("Watson", barcode_data_lib1$Filename),]
data2 <- barcode_data_lib2[grepl("Watson", barcode_data_lib2$Filename),]
data1$Filename1 <- gsub("-Watson", "", data1$Filename)
data1$Filename1 <- gsub("^A", "A-C", data1$Filename1)
data1$Filename1 <- gsub("^B", "B-D", data1$Filename1)
data1$WC <- Watson1$per+Crick1$per
data2$WC <- Watson2$per+Crick2$per
# merging the table together by barcode
merged <- merge(barcode_data_lib1, barcode_data_lib2, by = "Barcode")
merged <- merge(data1, data2, by = "Barcode")
# caculate the percentage of each number retained barcode per library
sum_lib1 <- sum(merged$Retained.x)
sum_lib2 <- sum(merged$Retained.y)
merged$library1 <- (merged$Retained.x/sum_lib1)*100
merged$library2 <- (merged$Retained.y/sum_lib2)*100
# to know which is library1 and which is library2
merged$library1 <- merged$WC.x
merged$library2 <- merged$WC.y
# making library2 negative to show it nicer in the plot
merged$library2 <- merged$library2*-1
......@@ -30,15 +47,17 @@ levels(data_long$Retained.norm) <- c("library 1", "library 2")
# chaning the name of the comlumn name into Retained_librarys
names(data_long)[names(data_long) == "name"] <- "Retained_librarys"
# making the ggplot (lolliplot) from the data
ggplot(data_long, aes(Retained.norm, Barcode)) +
geom_segment(aes(x = 0, y = Barcode, xend = Retained.norm, yend = Barcode), color = "grey50") +
# making the ggplot (lolliplot) from the data (watson and crick together (combined))
ggplot(data_long, aes(Retained.norm, Filename1)) +
geom_segment(aes(x = 0, y = Filename1, xend = Retained.norm, yend = Filename1), color = "grey50") +
geom_point(aes(color = Retained_librarys), size = 2.7) + # here he knows that there are library 1 and 2 with a different collor with te size 2.7
scale_fill_discrete(name = "Retained library's", labels = c("library 1", "library 2")) +
ggtitle("Retained reads per library") +
labs(x = 'percentage of retained barcodes', y = 'Barcodes') +
ggtitle("Retained reads per library combined") +
labs(x = 'percentage of retained barcodes', y = 'Filenames') +
geom_vline(xintercept = 0, size = 0.2) # adding a vertical line on position 0 with the size 0.2
#+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_lib1_and_2_lollipop.png",sep="")) # saving the figure at the specified location
+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_lib1_and_2_lollipop.png",sep="")) # saving the figure at the specified location
......
# @author: Maite van den Noort
# @Date: 15-01-2021
# @function: makes ggplot (lollipop plot) of the retained reads from library 1 and 2. without sample 22
library(ggplot2)
library(tidyr)
# data from library 1 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
# data from library 2 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
# if there is 22 in the column Filename remove the row
barcode_data_lib1 <- barcode_data_lib1[!grepl("22", barcode_data_lib1$Filename),]
barcode_data_lib2 <- barcode_data_lib2[!grepl("22", barcode_data_lib2$Filename),]
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_barcodes/")
# making subsets for every crick/watson strands from every sample form library1
Watson1 <- barcode_data_lib1[grepl("Watson", barcode_data_lib1$Filename),]
Crick1 <- barcode_data_lib1[grepl("Crick", barcode_data_lib1$Filename),]
Watson2 <- barcode_data_lib2[grepl("Watson", barcode_data_lib2$Filename),]
Crick2 <- barcode_data_lib2[grepl("Crick", barcode_data_lib2$Filename),]
# calculating percentage
Watson1$per <- (Watson1$Retained / sum(Watson1$Retained)) *100
Crick1$per <- (Crick1$Retained / sum(Crick1$Retained)) *100
Watson2$per <- (Watson2$Retained / sum(Watson2$Retained)) *100
Crick2$per <- (Crick2$Retained / sum(Crick2$Retained)) *100
# making new tabels with one columns of two tables into a watson table and a crick table
data1 <- barcode_data_lib1[grepl("Watson", barcode_data_lib1$Filename),]
data2 <- barcode_data_lib2[grepl("Watson", barcode_data_lib2$Filename),]
data1$Filename1 <- gsub("-Watson", "", data1$Filename)
data1$Filename1 <- gsub("^A", "A-C", data1$Filename1)
data1$Filename1 <- gsub("^B", "B-D", data1$Filename1)
data1$WC <- Watson1$per+Crick1$per
data2$WC <- Watson2$per+Crick2$per
# merging the table together by barcode
merged <- merge(data1, data2, by = "Barcode")
# to know which is library1 and which is library2
merged$library1 <- merged$WC.x
merged$library2 <- merged$WC.y
# making library2 negative to show it nicer in the plot
merged$library2 <- merged$library2*-1
# the data is transferred to another form, with all the values in Retained.norm
data_long <- pivot_longer(merged, cols =c(library1, library2), values_to = "Retained.norm")
# indicate the different levels (libraries)
levels(data_long$Retained.norm) <- c("library 1", "library 2")
# chaning the name of the comlumn name into Retained_librarys
names(data_long)[names(data_long) == "name"] <- "Retained_librarys"
# making the ggplot (lolliplot) from the data (watson and crick together (combined))
ggplot(data_long, aes(Retained.norm, Filename1)) +
geom_segment(aes(x = 0, y = Filename1, xend = Retained.norm, yend = Filename1), color = "grey50") +
geom_point(aes(color = Retained_librarys), size = 2.7) + # here he knows that there are library 1 and 2 with a different collor with te size 2.7
scale_fill_discrete(name = "Retained library's", labels = c("library 1", "library 2")) +
ggtitle("Retained reads per library combined without sample 22") +
labs(x = 'percentage of retained barcodes', y = 'Filenames') +
geom_vline(xintercept = 0, size = 0.2) # adding a vertical line on position 0 with the size 0.2
+ ggsave(paste(outputFigures,"percentage_Retained_barcodes_lib1_and_2_lollipop_min22.png",sep="")) # saving the figure at the specified location
......@@ -8,10 +8,10 @@ library(reshape2)
library(ggplot2)
# data: duplicate reads from library 1
datalib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/duckweed_epiGBS/output_lane2/Data_maite/Duplication_reads_lib1.tsv")
datalib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib1/Duplication_reads_lib1.tsv")
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/duckweed_epiGBS/Output_data_scripts/Figures duplication/")
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_duplication/")
# order library1
orderedlib1 <- datalib1[order(datalib1$V1),]
......@@ -26,13 +26,17 @@ lib1crick2 <- orderedlib1[grepl("Crick.2", orderedlib1$sample_name),]
lib1watson1 <- orderedlib1[grepl("Watson.1", orderedlib1$sample_name),]
lib1watson2 <- orderedlib1[grepl("Watson.2", orderedlib1$sample_name),]
# make one crick and one watson by adding the two together
crick <- data.table(lib1crick1$dups+lib1crick2$dups)
watson <- data.table(lib1watson1$dups+lib1watson2$dups)
# making subset for the sample names, which makes it easier/clearer for the new table (library1dups)
Sample_nameslib1 <- data.frame(do.call('rbind', strsplit(as.character(lib1crick1$sample_names),'-',fixed=TRUE)))
names(Sample_nameslib1)[names(Sample_nameslib1) == "X1"] <- "sample_names"
names(Sample_nameslib1)[names(Sample_nameslib1) == "X2"] <- "crick_watson"
# making a new table with the subsets from library1
library1dups <- data.table(Sample_names=Sample_nameslib1$sample_names, Crick1_dups=lib1crick1$dups, Crick2_dups=lib1crick2$dups, Watson1_dups=lib1watson1$dups, Watson2_dups=lib1watson2$dups)
library1dups <- data.table(Sample_names=Sample_nameslib1$sample_names, Crick=crick$V1, Watson=watson$V1)
# reshape the table, for an good figure
lib1dups <- melt(library1dups, id.var="Sample_names")
......@@ -44,7 +48,7 @@ ggplot(lib1dups, aes(fill=variable, y=value, x= Sample_names)) +
ggtitle("Library 1 duplicated reads") +
scale_x_discrete(guide = guide_axis(angle = 90)) +
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") +
scale_fill_manual(values = c("skyblue", "cyan3", "blueviolet", "pink")) +
scale_fill_manual(values = c("red", "skyblue")) +
ggsave(paste(outputFigures,"Library1_duplicated_reads.png",sep=""))
# create the figure with stackbar for library 1 from highest value to lowest
......@@ -54,7 +58,7 @@ ggplot(lib1dups, aes(fill=variable, y=value, x= reorder(Sample_names, -value)))
ggtitle("Library 1 From higest duplicated reads to lowest") +
scale_x_discrete(guide = guide_axis(angle = 90)) +
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") +
scale_fill_manual(values = c("skyblue", "cyan3", "blueviolet", "pink")) +
scale_fill_manual(values = c("red", "skyblue")) +
ggsave(paste(outputFigures,"Library1_from_higest_duplicated_reads_to_lowest.png",sep=""))
# create the figure with stackbar for library 1 with A and B from the same sample next to each other
......@@ -68,7 +72,7 @@ ggplot(lib1dups, aes(fill=variable, y=value, x=Sample_names)) +
ggtitle("Library 1 duplicated reads with A and B from the same sample next to each other") +
scale_x_discrete(guide = guide_axis(angle = 90)) +
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") +
scale_fill_manual(values = c("skyblue", "cyan3", "blueviolet", "pink")) +
scale_fill_manual(values = c("red", "skyblue")) +
ggsave(paste(outputFigures,"Library1_duplicated_reads_with_A_and_B.png",sep=""))
#-------------------------------------------------------------------------------------------
# library shape, why: because we show than the samples in a different way, they are now better comparable.
......
......@@ -7,11 +7,11 @@ library(tidyverse)
library(reshape2)
library(ggplot2)
# input library2 duplication reads
datalib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/duckweed_epiGBS/output_lane8/Data_maite/Duplication_reads_lib2.tsv") # path to input data
# data: duplicate reads from library 2
datalib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib2/Duplication_reads_lib2.tsv")
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/duckweed_epiGBS/Output data scripts/Figures duplication/")
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_duplication/")
# order library2
orderedlib2 <- datalib2[order(datalib2$V1),]
......@@ -26,13 +26,17 @@ lib2crick2 <- orderedlib2[grepl("Crick.2", orderedlib2$sample_name),]
lib2watson1 <- orderedlib2[grepl("Watson.1", orderedlib2$sample_name),]
lib2watson2 <- orderedlib2[grepl("Watson.2", orderedlib2$sample_name),]
# make one crick and one watson by adding the two together
crick <- data.table(lib2crick1$dups+lib2crick2$dups)
watson <- data.table(lib2watson1$dups+lib2watson2$dups)
# making subset for the sample names, which makes it easier/clearer for the new table (library2dups)
Sample_nameslib2 <- data.frame(do.call('rbind', strsplit(as.character(lib2crick1$sample_names),'-',fixed=TRUE)))
names(Sample_nameslib2)[names(Sample_nameslib2) == "X1"] <- "sample_names"
names(Sample_nameslib2)[names(Sample_nameslib2) == "X2"] <- "crick_watson"
# making a new table with the subsets from library2
library2dups <- data.table(Sample_names=Sample_nameslib2$sample_names, Crick1_dups=lib2crick1$dups, Crick2_dups=lib2crick2$dups, Watson1_dups=lib2watson1$dups, Watson2_dups=lib2watson2$dups)
# making a new table with the subsets from library1
library2dups <- data.table(Sample_names=Sample_nameslib2$sample_names, Crick=crick$V1, Watson=watson$V1)
# reshape the table, for an good figure
lib2dups <- melt(library2dups, id.var="Sample_names")
......@@ -44,7 +48,7 @@ ggplot(lib2dups, aes(fill=variable, y=value, x= Sample_names)) +
ggtitle("library 2 duplicate reads") +
scale_x_discrete(guide = guide_axis(angle = 90)) +
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") +
scale_fill_manual(values = c("skyblue", "cyan3", "blueviolet", "pink")) +
scale_fill_manual(values = c("red", "skyblue")) +
ggsave(paste(outputFigures,"Library2_duplicated_reads.png",sep=""))
# create the figure with stackbar for library 2 from highest value to lowest
......@@ -54,7 +58,7 @@ ggplot(lib2dups, aes(fill=variable, y=value, x= reorder(Sample_names, -value)))
ggtitle("library 2 From higest duplicate reads to lowest") +
scale_x_discrete(guide = guide_axis(angle = 90)) +
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") +
scale_fill_manual(values = c("skyblue", "cyan3", "blueviolet", "pink")) +
scale_fill_manual(values = c("red", "skyblue")) +
ggsave(paste(outputFigures,"Library2_from_higest_duplicated_reads_to_lowest.png",sep=""))
......@@ -69,5 +73,5 @@ ggplot(lib2dups, aes(fill=variable, y=value, x=Sample_names)) +
ggtitle("library 2 duplicate reads with C and D from the same sample next to each other") +
scale_x_discrete(guide = guide_axis(angle = 90)) +
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") +
scale_fill_manual(values = c("skyblue", "cyan3", "blueviolet", "pink")) +
scale_fill_manual(values = c("red", "skyblue")) +
ggsave(paste(outputFigures,"Library2_duplicated_reads_with_C_and_D.png",sep=""))
......@@ -7,8 +7,11 @@ library(tidyverse)
library(reshape2)
library(ggplot2)
# input library1 duplication percentage
datalib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/duckweed_epiGBS/output_lane2/Data_maite/Dublication_percentage_lib1.tsv")
# data: duplicate percentage from library 1
datalib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib1/Dublication_percentage_lib1.tsv")
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_duplication/")
# order library1
orderedlib1 <- datalib1[order(datalib1$V1),]
......@@ -24,13 +27,17 @@ lib1crick2 <- orderedlib1[grepl("Crick.2", orderedlib1$sample_name),]
lib1watson1 <- orderedlib1[grepl("Watson.1", orderedlib1$sample_name),]
lib1watson2 <- orderedlib1[grepl("Watson.2", orderedlib1$sample_name),]
# make one crick and one watson by adding the two together
crick <- data.table(lib1crick1$dups+lib1crick2$dups)
watson <- data.table(lib1watson1$dups+lib1watson2$dups)
# making subset for the sample names, which makes it easier/clearer for the new table (library1dups)
Sample_nameslib1 <- data.frame(do.call('rbind', strsplit(as.character(lib1crick1$sample_names),'-',fixed=TRUE)))
names(Sample_nameslib1)[names(Sample_nameslib1) == "X1"] <- "sample_names"
names(Sample_nameslib1)[names(Sample_nameslib1) == "X2"] <- "crick_watson"
# making a new table with the subsets from library1
library1dups <- data.table(Sample_names=Sample_nameslib1$sample_names, Crick1_dups=lib1crick1$dups, Crick2_dups=lib1crick2$dups, Watson1_dups=lib1watson1$dups, Watson2_dups=lib1watson2$dups)
library1dups <- data.table(Sample_names=Sample_nameslib1$sample_names, Crick=crick$V1, Watson=watson$V1)
# reshape the table, for an good figure
lib1dups <- melt(library1dups, id.var="Sample_names")
......@@ -42,7 +49,8 @@ ggplot(lib1dups, aes(fill=variable, y=value, x= Sample_names)) +
ggtitle("Library 1 duplicate percentage") +
scale_x_discrete(guide = guide_axis(angle = 90)) +
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") +
scale_fill_manual(values = c("skyblue", "cyan3", "blueviolet", "pink"))
scale_fill_manual(values = c("red", "skyblue")) +
ggsave(paste(outputFigures,"Library1_duplicated_percentage.png",sep=""))
# create the figure with stackbar for library 1 from highest value to lowest
ggplot(lib1dups, aes(fill=variable, y=value, x= reorder(Sample_names, -value))) +
......@@ -51,7 +59,8 @@ ggplot(lib1dups, aes(fill=variable, y=value, x= reorder(Sample_names, -value)))
ggtitle("Library 1 From higest duplicate percentage to lowest") +
scale_x_discrete(guide = guide_axis(angle = 90)) +
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") +
scale_fill_manual(values = c("skyblue", "cyan3", "blueviolet", "pink"))
scale_fill_manual(values = c("red", "skyblue")) +
ggsave(paste(outputFigures,"Library1_duplicated_percentage_high_low.png",sep=""))
# create the figure with stackbar for library 1 with A and B from the same sample next to each other
lib1dups$Sample_names <- factor(lib1dups$Sample_names,levels = c("A27_1","B27_1","A1_0","B1_0","A13_2","B13_2","A2_2","B2_2",
......@@ -64,4 +73,5 @@ ggplot(lib1dups, aes(fill=variable, y=value, x=Sample_names)) +
ggtitle("Library 1 duplicate percentage with A and B from the same sample next to each other") +
scale_x_discrete(guide = guide_axis(angle = 90)) +
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") +
scale_fill_manual(values = c("skyblue", "cyan3", "blueviolet", "pink"))
\ No newline at end of file
scale_fill_manual(values = c("red", "skyblue")) +
ggsave(paste(outputFigures,"Library1_duplicated_percentage_A_B.png",sep=""))
......@@ -4,9 +4,9 @@
library(ggplot2)
# Input: 1: fisrt methylation.bed file, 2: second methylation.bed file, 3: ouput file, here are the ggplot saved
args<-c("/mnt/nfs/bioinfdata/home/NIOO/maiten/duckweed_epiGBS/output_lane8/mapping/methylation.bed",
"/mnt/nfs/bioinfdata/home/NIOO/maiten/duckweed_epiGBS/lane2_denovo8/epigbs2/output/mapping/methylation.bed",
"/mnt/nfs/bioinfdata/home/NIOO/maiten/duckweed_epiGBS/output_data_scripts/Figures_methylation_(maarten)/output_denovo8_datalane2/")
args<-c("/mnt/nfs/bioinfdata/home/NIOO/maiten/duckweed_epiGBS/big_denono/librar1_unknown/output_lib1_unknown/mapping/methylation.bed",
"/mnt/nfs/bioinfdata/home/NIOO/maiten/duckweed_epiGBS/big_denono/library2/outputlib2/mapping/methylation.bed",
"/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_methylation_(maarten)/Big_de_novo/")
# variables are made with the different files/directorys
inputlib1<-args[1]
......@@ -59,12 +59,12 @@ print(length(lociLib1))
print(length(lociLib2))
# making a new table named nSites, with all sites from the two input files and the shared sites
nSites<-data.frame(method=c("lane8_denovo8","lane2_denovo8","shared"),sites=
nSites<-data.frame(method=c("lib1","lib2","shared"),sites=
c(length(unique(lociLib1)),length(unique(lociLib2)),sum(lociLib1%in%lociLib2)))
# making a plot of the table nSites, and saves it in the given location (see line 6)
ggplot(nSites,aes(x=method,y=sites,label=sites))+geom_label()+
ggsave(paste(outputFigures,"Mappinglane8_denovo8vslane2_denovo8sites.png"))
ggsave(paste(outputFigures,"Mappinglib1and2sites.png"))
# as.vector: Converts a distributed matrix (lib2) into a non-distributed vector (nlib2).
nlib2<-as.vector(lib2[lociLib2Shared,grep("methylated",colnames(lib2))])
......@@ -102,22 +102,22 @@ for(i in percentageDifference$diff){
# ggplot for the difference in fraction between the two methylation files in a plot
ggplot(percentageDifference,aes(x=log10(diff),y=perc))+geom_point()+
xlab("Difference in fraction methylation")+ylab("Percentage of sites with lower difference")+
ggsave(paste(outputFigures,"Mappinglane8_denovo8vslane2_denovo8difference.png",sep=""))