原始数据下载和处理
#### 下载安装SRA Toolkit
cd /mnt/data/software/
mkdir -p sratools
cd ./sratools/
# 1.下载SRA Toolkit 3.1.1 64-bit Linux 版压缩包,内含 prefetch、fastq-dump 等工具:
wget https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/3.1.1/sratoolkit.3.1.1-centos_linux64.tar.gz
# 2.解压:
tar xzvf sratoolkit.3.1.1-centos_linux64.tar.gz
# 3.验证安装: (只取前 2 条读段,直接输出到屏幕,确认工具能正常调用并返回 FASTQ 格式。)
fastq-dump --stdout -X 2 SRR27397081
# 4.下载SRR数据:
nohup prefetch SRR11145474 &
# 5.判断SRR27397081.sra文件是单端还是双端测序数据 :
fastq-dump -X 1 --split-spot -Z SRR27397081.sra | wc -l
# 6.拆分转换:
nohup fastq-dump SRR27397081.sra --split-files --gzip -O ./SRR &
# 7.创建工作路径
cd ../../
mkdir -p ./DW/{1_Rawdata,2_fastqc,3_fastp/logs,4_alignment,5_featurecounts}
cd ./DW
# 8.sra下载,移动到./1_Rawdata/目录
nohup cat ./SRR_Acc_List.txt | whileread id; do prefetch --output-directory ./1_Rawdata/ ${id} & done > ./1_Rawdata/output.log 2>&1
mv ./1_Rawdata/*/*.sra ./1_Rawdata/
nohup bash -c '
while read id; do
fasterq-dump -e 40 -p \
-O ./1_Rawdata/ \
./1_Rawdata/${id}.sra
gzip ./1_Rawdata/${id}_*.fastq
done < SRR_Acc_List.txt
' > ./1_Rawdata/fasterq_dump.log 2>&1 &
质控(fastqc)
# 挂后台并行 质控
nohup bash -c 'for file in ./1_Rawdata/*.fq.gz; do fastqc "$file" -o ./2_fastqc/ & done' >> ./2_fastqc/fastqc.log 2>&1 &
# 整合质控结果
multiqc ./2_fastqc/ -o ./2_fastqc/
过滤(fastp)
nohup bash -c '
cat ./SRR_Acc_List.txt | while read id; do
fastp \
-i ./1_Rawdata/${id}_1.fastq.gz \
-I ./1_Rawdata/${id}_2.fastq.gz \
-o ./3_fastp/${id}_1.fastq.gz \
-O ./3_fastp/${id}_2.fastq.gz \
-q 20 -l 30 \
-Y 30 \
--detect_adapter_for_pe \
--trim_poly_g --trim_poly_x \
-w 15 \
-h ./3_fastp/${id}.html \
-j ./3_fastp/${id}.json \
> ./3_fastp/logs/${id}.log 2>&1 &
done
wait
' > ./3_fastp/fastp_all.log 2>&1 &
## 查看过滤后的结果
multiqc ./3_fastp/ -o ./3_fastp/
比对(STAT)
nohup bash -c '
cat ./SRR_Acc_List.txt | while read id; do
STAR \
--runThreadN 10 \
--readFilesCommand zcat \
--genomeDir /mnt/data/reference/mouse/star_index \
--outFileNamePrefix ./4_alignment/${id}_ \
--outSAMtype BAM SortedByCoordinate \
--readFilesIn ./3_fastp/${id}_1.fastq.gz ./3_fastp/${id}_2.fastq.gz \
--quantMode GeneCounts > ./4_alignment/${id}.log 2>&1 &
done
' > ./4_alignment/star_all.log 2>&1 &
## 查看比对后的结果
multiqc ./4_alignment/ -o ./4_alignment/
定量(featureCounts)
nohup bash -c '
while read id; do
featureCounts \
-T 25 \
-p \
--countReadPairs \
-a /mnt/data/reference/mouse/ \
-o ./5_featurecounts/${id}.txt \
./4_alignment/${id}_Aligned.sortedByCoord.out.bam
done < ./SRR_Acc_List.txt
' > ./5_featurecounts/featureCounts.log 2>&1 &
multiqc ./5_featurecounts -o ./5_featurecounts/
Rscript ../codes/run_merge_fc_counts_normalization.R -i ./5_featurecounts -o ./5_featurecounts -f my_exp