There is IT maintenance on the servers at January 26 from 11:00 to 12:00.

Commit a13566ab authored by n.stopnisek@nioo.knaw.nl's avatar n.stopnisek@nioo.knaw.nl
Browse files

test

parent ff9b161d
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# Read Processing
To process all the reads from different projects I made following steps.
1. copy data and rename
```
# rename fastq files:
for i in *R1*
do
name=`ls $i | cut -f1 -d'R'`
#echo $name
mv ${i} ${name}16S_R1.fq
done
# Command to replace names in a folder
eval "$(sed 's/^/mv /g' newNames.v2.txt )"
```
2. Using cutadapt to remove primers:
```
dir=/home/NIOO.INT/nejcs/TomatoMicrobiome_comparison
outdir=/home/NIOO.INT/nejcs/TomatoMicrobiome_comparison/cutadapt
mkdir $outdir
for folder in `find ${dir}/* -type d -exec basename {} \;`
do
echo 'Working on project name ' $folder
cd ${dir}/${folder}
# Forward primer is 341F
fw=CCTACGGGNGGCWGCAG
# Reverse primer is 785R
rw_rc=GACTACHVGGGTATCTAATCC
mkdir ${outdir}/${folder}
for prefix in `ls *_R1.fq | cut -f1 -d'_' `; do
echo "Working on the sample ID" ${prefix}
cutadapt -g $fw -G $rw_rc -j 32 --discard-untrimmed -O 8 -o ${outdir}/${folder}/${prefix}_R1.fq -p ${outdir}/${folder}/${prefix}_R2.fq ${dir}/${folder}/${prefix}_16S_R1.fq ${dir}/${folder}/${prefix}_16S_R2.fq >> ${outdir}/${folder}/cutadapt_primer_trimming_stats.txt 2>&1
done
cd $dir
done
```
3. Merge fastq reads with usearch
```
dir=/home/NIOO.INT/nejcs/TomatoMicrobiome_comparison/cutadapt
outdir=/home/NIOO.INT/nejcs/TomatoMicrobiome_comparison/mergedFastq
mkdir $outdir
cd $dir
for folder in `find ${dir}/* -type d -exec basename {} \;`
do
echo 'Working on project name ' $folder
mkdir ${outdir}/$folder
cd ${dir}/${folder}
for prefix in `ls *_R1.fq | cut -f1 -d'_' | sort -u`; do
echo "Working on the sample ID" ${prefix}
/home/NIOO.INT/nejcs/usearch11.0.667_i86linux32 -fastq_mergepairs ${dir}/${folder}/${prefix}_R1.fq -fastq_minmergelen 420 -fastq_maxmergelen 460 -fastq_maxdiffs 5 -fastqout ${outdir}/${folder}/${prefix}_merged_16S.fq
done
done
```
4. Use merged and primer stripped reads for denoising wiht DADA2. See dada2_readProcessing.R script for more details.
#' This is the DADA2 pipeline for processing reads from 6 different tomato
#' microbiome studies. These studies include rhizosphere samples from domestic-
#' ated and wilde plants grown in greenhouse and natural soils. Additionally,
#' samples from seed are also included.
#' All studies used same primer combination amplifying the V3V4 region of the
#' 16S rRNA gene.
path_ecuador1="/home/NIOO.INT/nejcs/TomatoMicrobiome_comparison/mergedFastq/Ecuador1"
path_ecuador2="/home/NIOO.INT/nejcs/TomatoMicrobiome_comparison/mergedFastq/Ecuador2"
path_ril="/home/NIOO.INT/nejcs/TomatoMicrobiome_comparison/mergedFastq/RIL"
path_seed_block='/home/NIOO.INT/nejcs/TomatoMicrobiome_comparison/mergedFastq/Seed_block'
path_seed="/home/NIOO.INT/nejcs/TomatoMicrobiome_comparison/mergedFastq/Seed"
path_succession="/home/NIOO.INT/nejcs/TomatoMicrobiome_comparison/mergedFastq/Succession"
outpath="/home/NIOO.INT/nejcs/TomatoMicrobiome_comparison/DADA2"
paths=c(path_ecuador1, path_seed_block, path_seed, path_succession,path_ril, path_ecuador2)
DF=data.frame(project=c('Ecuador1','Seed_block', 'Seed', 'Succesion', 'RIL', 'Ecuador2'), path=paths)
saveRDS(DF,'Data/pathFile.rds')
tables=c()
for (i in seq_along(DF$path)) {
path=DF$path[i]
project=DF$project[i]
#' Fastq filenames have format: SAMPLENAME_merged_16S.fq
fn <- sort(list.files(path, pattern="merged_16S", full.names = TRUE))
#' Extract sample names, assuming filenames have format: SAMPLENAME_merged_16S.fq
sample.names <- sapply(strsplit(basename(fn), "_merged"), `[`, 1)
#' Place filtered files in filtered/'project' subdirectory
filt <- file.path(outpath,"filtered", project, paste0(sample.names, "_filt.fq"))
out <- dada2::filterAndTrim(fn, filt, minLen = 410, truncQ=2, maxN=0, maxEE=2, truncLen = 420,
rm.phix=TRUE, multithread=TRUE)
err <- dada2::learnErrors(filt, multithread=TRUE)
#' Denoising reads
denoised <- dada2::dada(filt, err=err, multithread=TRUE)
#' Generating ASV table
seqtab <- dada2::makeSequenceTable(denoised, orderBy = "abundance")
#' Remove chimera
seqtab.nochim <- dada2::removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE)
#storing results
tables[[project]] <- seqtab.nochim
}
combinedASVtable=mergeSequenceTables(tables=tables)
saveRDS(combinedASVtable, 'DADA2/combinedTable.rds')
saveRDS(tables, 'DADA2/AsvTables.rds')
ASVnames=paste('ASV', seq(1:length(colnames(combinedASVtable))), sep='')
ASVseq=colnames(combinedASVtable)
ASV.id.seq=data.frame(ID=ASVnames,seq=ASVseq)
write.table(ASV.id.seq, 'Data/ASVseq.tab', se='\t', quote = FALSE, row.names = F)
This diff is collapsed.
# Import ASV fasta file into QIIME2
qiime tools import \
--input-path ../Data/ASVseq.fa \
--output-path ASVseq.qza \
--type 'FeatureData[Sequence]'
# Extract V34 region of th amplicons from the Silva DB (v138)
qiime feature-classifier extract-reads \
--p-f-primer CCTACGGGNGGCWGCAG --p-r-primer GACTACHVGGGTATCTAATCC \
--i-sequences silva-138-99-seqs.qza \
--p-max-length 440 \
--o-reads silva138-v34.qza --p-n-jobs 20
# Train classifier
qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads silva138-v34.qza \
--i-reference-taxonomy silva-138-99-tax.qza \
--o-classifier silva138-v34-nb-classifier.qza
# Assign taxonomy
qiime feature-classifier classify-sklearn \
--i-classifier silva138-v34-nb-classifier.qza \
--i-reads ASVseq.qza \
--o-classification taxonomyASV.qza
# Building phylogenetic tree
qiime phylogeny align-to-tree-mafft-fasttree \
--i-sequences ASVseq.qza \
--o-alignment ASVseq-aligned.qza \
--o-masked-alignment ASVseq-aligned-masked.qza \
--o-tree ASVseq-unrooted.qza \
--o-rooted-tree ASVseq-rooted.qza
setwd('~/TomatoMicrobiome_comparison/')
ec1=read.delim('Ecuador1/newNames.txt',header=F,row.names = NULL)
ec1 %>% mutate(V2=if_else(str_detect(string = V1,pattern = 'R1'),'R1', 'R2')) %>% arrange(V2) %>%
mutate(V3=paste('A',rep(seq(1,length(V2)/2), times = 2),'_16S_',V2,'.fq', sep='')) %>%
select(-V2) %>%
write_delim(col_names = F, 'Ecuador1/newNames.v2.txt')
ec2=read.delim('Ecuador2/newNames.txt',header=F,row.names = NULL)
ec2 %>% mutate(V2=if_else(str_detect(string = V1,pattern = 'R1'),'R1', 'R2')) %>% arrange(V2) %>%
mutate(V3=paste('B',rep(seq(1,length(V2)/2), times = 2),'_16S_',V2,'.fq', sep='')) %>%
select(-V2) %>%
write_delim(col_names = F, 'Ecuador2/newNames.v2.txt')
ril=read.delim('RIL/newNames.txt',header=F,row.names = NULL)
ril %>% mutate(V2=if_else(str_detect(string = V1,pattern = 'R1'),'R1', 'R2')) %>% arrange(V2) %>%
mutate(V3=paste('C',rep(seq(1,length(V2)/2), times = 2),'_16S_',V2,'.fq', sep='')) %>%
select(-V2) %>%
write_delim(col_names = F, 'RIL/newNames.v2.txt')
seed=read.delim('Seed/newNames.txt',header=F,row.names = NULL)
seed %>% mutate(V2=if_else(str_detect(string = V1,pattern = 'R1'),'R1', 'R2')) %>% arrange(V2) %>%
mutate(V3=paste('D',rep(seq(1,length(V2)/2), times = 2),'_16S_',V2,'.fq', sep='')) %>%
select(-V2) %>%
write_delim(col_names = F, 'Seed/newNames.v2.txt')
seedBlock=read.delim('Seed_block/newNames.txt',header=F,row.names = NULL)
seedBlock %>% mutate(V2=if_else(str_detect(string = V1,pattern = 'R1'),'R1', 'R2')) %>% arrange(V2) %>%
mutate(V3=paste('E',rep(seq(1,length(V2)/2), times = 2),'_16S_',V2,'.fq', sep='')) %>%
select(-V2) %>%
write_delim(col_names = F, 'Seed_block/newNames.v2.txt')
suc=read.delim('Succession/newNames.txt',header=F,row.names = NULL)
suc %>% mutate(V2=if_else(str_detect(string = V1,pattern = 'R1'),'R1', 'R2')) %>% arrange(V2) %>%
mutate(V3=paste('F',rep(seq(1,length(V2)/2), times = 2),'_16S_',V2,'.fq', sep='')) %>%
select(-V2) %>%
write_delim(col_names = F, 'Succession/newNames.v2.txt')
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment