Commit 6fce6a49 authored by Noort's avatar Noort
Browse files

src update

parent 9bbfb365
......@@ -7,6 +7,7 @@ library(dplyr)
library(tidyverse)
library(reshape2)
library(ggplot2)
# data from library 1 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
# data from library 2 with the information about the barcodes (form the 'process_radtags.clone.log' file)
......@@ -41,8 +42,6 @@ lib2D <- datalib1and2[grepl("^D", datalib1and2$Filename),]
lib1A$Filename <- gsub("-Watson", "",lib1A$Filename)
lib1A$Filename <- gsub("^A", "", lib1A$Filename)
sum(lib1A$Retained)
# making an extra column with percentage for both librarys
lib1A$Retained_pct1 = (lib1A$Retained / sum(lib1A$Retained)) *100
lib1B$Retained_pct2 = (lib1B$Retained / sum(lib1B$Retained)) *100
......@@ -68,7 +67,7 @@ lib1and2retainedper2 <- melt(datalib1and2per2, id.var="Barcode")
# making the ggplot for barcode_data_lib1 and 2 together in percentage ordered by sample names A and C(dodge)
ggplot(lib1and2retainedper1, aes(y= value, fill= variable, x= Barcode)) +
geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot
geom_bar(width=0.7, position = position_dodge(width=0.6), stat="identity", color ="gray")+ # using nice colors and making it a dogde barplot with weidth 0.6
labs(y = 'percentage of retained barcodes', x = 'Barcodes') + # giving the labels names
ggtitle("Library 1 and 2 retained barcodes in percentage combined, sample A and B (dodge)") + # giving the plot a title
scale_x_discrete(guide = guide_axis(angle = 70)) + # the position of the values on the x-as. 0=horizontal, 90= vertical
......
# @author: Maite van den Noort
# @Date: 28-12-2020
# @function: makes ggplots of the retained reads from library 1.
library(ggplot2)
library(broman)
#library 1
# data from library 1 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/results/output_data_scripts/Figures_barcodes/")
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_barcodes/")
# making variables to use beautiful colors in the graph
blue <- brocolors("crayons")["Cornflower"]
......
......@@ -7,12 +7,12 @@ library(tidyverse)
library(reshape2)
library(ggplot2)
# data from library 1 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
# data from library 2 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/results/output_data_scripts/Figures_barcodes/")
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_barcodes/")
# making an extra column with percentage for both librarys
barcode_data_lib1$Retained_pct1 = (barcode_data_lib1$Retained / sum(barcode_data_lib1$Retained)) *100
......
# @author: Maite van den Noort
# @Date: 28-12-2020
# @function: makes ggplots of the retained reads from library 2.
library(ggplot2)
library(broman)
#library 2
# data from library 2 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/results/output_data_scripts/Figures_barcodes/")
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_barcodes/")
# making variables to use beautiful colors in the graph
blue <- brocolors("crayons")["Blue Violet"]
......
......@@ -7,12 +7,12 @@ library(tidyverse)
library(reshape2)
library(ggplot2)
# data from library 1 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
barcode_data_lib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib1/barcode_information_lib1.tsv", header = T)
# data from library 2 with the information about the barcodes (form the 'process_radtags.clone.log' file)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
barcode_data_lib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib2/barcode_information_lib2.tsv", header = T)
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/results/output_data_scripts/Figures_barcodes/")
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_barcodes/")
# making subsets for every crick/watson strands from every sample form library1
lib1A <- barcode_data_lib1[grepl("^A", barcode_data_lib1$Filename),]
......
......@@ -7,6 +7,7 @@ library(tidyverse)
library(reshape2)
library(ggplot2)
cite_packages()
# data: duplicate percentage from library 1
datalib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib1/Dublication_percentage_lib1.tsv")
......@@ -28,8 +29,8 @@ lib1watson1 <- orderedlib1[grepl("Watson.1", orderedlib1$sample_name),]
lib1watson2 <- orderedlib1[grepl("Watson.2", orderedlib1$sample_name),]
# make one crick and one watson by adding the two together
crick <- data.table(lib1crick1$dups+lib1crick2$dups)
watson <- data.table(lib1watson1$dups+lib1watson2$dups)
crick <- data.table((lib1crick1$dups+lib1crick2$dups)/2)
watson <- data.table((lib1watson1$dups+lib1watson2$dups)/2)
# making subset for the sample names, which makes it easier/clearer for the new table (library1dups)
Sample_nameslib1 <- data.frame(do.call('rbind', strsplit(as.character(lib1crick1$sample_names),'-',fixed=TRUE)))
......
......@@ -7,8 +7,11 @@ library(tidyverse)
library(reshape2)
library(ggplot2)
# input library2 percentage duplicate
datalib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/duckweed_epiGBS/output_lane8/Data_maite/Duplication_percentage_lib2.tsv")
# data: duplicate percentage from library 2
datalib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib2/Duplication_percentage_lib2.tsv")
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_duplication/")
# order library2
orderedlib2 <- datalib2[order(datalib2$V1),]
......@@ -23,13 +26,17 @@ lib2crick2 <- orderedlib2[grepl("Crick.2", orderedlib2$sample_name),]
lib2watson1 <- orderedlib2[grepl("Watson.1", orderedlib2$sample_name),]
lib2watson2 <- orderedlib2[grepl("Watson.2", orderedlib2$sample_name),]
# make one crick and one watson by adding the two together
crick <- data.table((lib2crick1$dups+lib2crick2$dups)/2)
watson <- data.table((lib2watson1$dups+lib2watson2$dups)/2)
# making subset for the sample names, which makes it easier/clearer for the new table (library2dups)
Sample_nameslib2 <- data.frame(do.call('rbind', strsplit(as.character(lib2crick1$sample_names),'-',fixed=TRUE)))
names(Sample_nameslib2)[names(Sample_nameslib2) == "X1"] <- "sample_names"
names(Sample_nameslib2)[names(Sample_nameslib2) == "X2"] <- "crick_watson"
# making a new table with the subsets from library2
library2dups <- data.table(Sample_names=Sample_nameslib2$sample_names, Crick1_dups=lib2crick1$dups, Crick2_dups=lib2crick2$dups, Watson1_dups=lib2watson1$dups, Watson2_dups=lib2watson2$dups)
# making a new table with the subsets from library1
library2dups <- data.table(Sample_names=Sample_nameslib2$sample_names, Crick=crick$V1, Watson=watson$V1)
# reshape the table, for an good figure
lib2dups <- melt(library2dups, id.var="Sample_names")
......@@ -41,7 +48,8 @@ ggplot(lib2dups, aes(fill=variable, y=value, x= Sample_names)) +
ggtitle("library 2 percentage duplicate") +
scale_x_discrete(guide = guide_axis(angle = 90)) +
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") +
scale_fill_manual(values = c("skyblue", "cyan3", "blueviolet", "pink"))
scale_fill_manual(values = c("red", "skyblue")) +
ggsave(paste(outputFigures,"Library2_duplicated_percentage.png",sep=""))
# create the figure with stackbar for library 2 from highest value to lowest
ggplot(lib2dups, aes(fill=variable, y=value, x= reorder(Sample_names, -value))) +
......@@ -50,7 +58,8 @@ ggplot(lib2dups, aes(fill=variable, y=value, x= reorder(Sample_names, -value)))
ggtitle("library 2 From higest percentage duplicate to lowest") +
scale_x_discrete(guide = guide_axis(angle = 90)) +
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") +
scale_fill_manual(values = c("skyblue", "cyan3", "blueviolet", "pink"))
scale_fill_manual(values = c("red", "skyblue")) +
ggsave(paste(outputFigures,"Library2_duplicated_percentage_high_low.png",sep=""))
# create the figure with stackbar for library 2 with A and B from the same sample next to each other
lib2dups$Sample_names <- factor(lib2dups$Sample_names,levels = c("C27_1","D27_1","C1_0","D1_0","C13_2","D13_2","C2_2","D2_2",
......@@ -60,7 +69,8 @@ lib2dups$Sample_names <- factor(lib2dups$Sample_names,levels = c("C27_1","D27_1"
ggplot(lib2dups, aes(fill=variable, y=value, x=Sample_names)) +
geom_bar(position="stack", stat="identity")+
labs(y = 'percentage duplicate', x = 'sample names') +
ggtitle("library 2 percentage duplicate with A and B from the same sample next to each other") +
ggtitle("library 2 percentage duplicate with C and D from the same sample next to each other") +
scale_x_discrete(guide = guide_axis(angle = 90)) +
geom_text(aes(label = value), size = 3, hjust = 0.5, vjust = 3, position = "stack") +
scale_fill_manual(values = c("skyblue", "cyan3", "blueviolet", "pink"))
geom_text(aes(label = round(value, digits = 1)), size = 3, hjust = 0.5, vjust = 3, position = "stack") +
scale_fill_manual(values = c("red", "skyblue")) +
ggsave(paste(outputFigures,"Library2_duplicated_percentage_CD.png",sep=""))
......@@ -8,10 +8,10 @@ library(reshape2)
library(ggplot2)
# data: unique reads from library 1
datalib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/data/Created_data_lib1/Unique_reads_lib1.tsv")
datalib1 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib1/Unique_reads_lib1.tsv")
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/internship-maite-epigbs2/results/output_data_scripts/Figures_unique_reads/")
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_unique_reads/")
# order library1
orderedlib1 <- datalib1[order(datalib1$V1),]
......
......@@ -8,10 +8,10 @@ library(reshape2)
library(ggplot2)
# input library2 unique reads
datalib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/duckweed_epiGBS/output_lane8/Data_maite/Unique_reads_lib2.tsv")
datalib2 <- read.table(file ="/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/data/Created_data_lib2/Unique_reads_lib2.tsv")
# output path, here are the figures saved
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/duckweed_epiGBS/Output data scripts/Figures unique reads/")
outputFigures <- ("/mnt/nfs/bioinfdata/home/NIOO/maiten/maite-internship-epigbs/results/output_data_scripts/Figures_unique_reads/")
# order library2
orderedlib2 <- datalib2[order(datalib2$V1),]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment