Commit 40cffd97 by righetti2

Upload New File

parent 9120760a
#!/bin/bash
#SBATCH --job-name=CNVs
#SBATCH --output=/shared/home/righettin/Analyses/slurm_outputs/CNVs.out
#SBATCH --error=/shared/home/righettin/Analyses/slurm_outputs/CNVs.error
#SBATCH --cpus-per-task=48
#SBATCH --nodes=1
# Define paths
root_dir="/shared/home/righettin/Analyses/CNV_estimates"
analysis_dir="$root_dir/analysis"
genes_id2name="/shared/home/righettin/Data/References/hg38/geneID2name_hg38_Apr24.tsv"
paralogs="/shared/home/righettin/Data/References/hg38/paralogs_hg38_Apr24.txt"
# 1) Concatenate all CNV estimates in a single file
echo "1) Concatenating all CNV estimates in a single file:"
output_file="$analysis_dir/AllGenomes.type.nobl.cnv"
genomes_list="$analysis_dir/genomes.type.list"
> $output_file # Create an empty output file
while read genome_id; do
cat $root_dir/$genome_id.nobl.cnv.tsv >> $output_file
done < $genomes_list
echo "All CNV estimates have been concatenated to ${output_file}."
# 2) Remove decimal places from the fifth column
echo "2) Removing decimal places from the fifth column of $output_file:"
# Using awk to remove decimal places (".1" to ".9") from the fifth column
awk -F'\t' '{sub(/\.[0-9]+/, "", $5)}1' OFS='\t' "$output_file" > "${output_file}.tmp"
mv "${output_file}.tmp" "$output_file"
echo "Decimal places removed from the fifth column of $output_file."
# 3) Compute Gene Matrix
echo "3) Computing Gene Matrix for all genomes:"
python3 /shared/home/righettin/Scripts/CNV_scripts/computeGeneMatrix.py -t $output_file -d $genes_id2name -p $paralogs > $analysis_dir/AllGenomes.type.nobl.cnv.matrix.ALL.tsv
echo "Gene Matrix for all genomes computed and saved as $analysis_dir/AllGenomes.type.nobl.cnv.matrix.ALL.tsv."
echo "Since this contains all genes, it can be filtered to retain only genes that have a gain or a loss in at least one archaic genome compared to the human reference:"
python /shared/home/righettin/Scripts/CNV_scripts/computeGeneMatrix.py -t $output_file -d $genes_id2name -p $paralogs |
awk 'NR==1{print;next} {for(i=5;i<=NF;i++){ if($i/$4<0.5||$i/$4>1.5){print;next} }}' > $analysis_dir/AllGenomes.type.nobl.cnv.matrix.tsv
echo "Results are in $analysis_dir/AllGenomes.type.nobl.cnv.matrix.tsv."
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment