Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
D
Diet_CNVs
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
righetti2
Diet_CNVs
Commits
349bf53c
Commit
349bf53c
authored
Sep 25, 2024
by
righetti2
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
02e348be
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
109 additions
and
0 deletions
+109
-0
genes_with_CNV_at_pop_lvl.R
genes_with_CNV_at_pop_lvl.R
+109
-0
No files found.
genes_with_CNV_at_pop_lvl.R
0 → 100644
View file @
349bf53c
# Load necessary libraries
library
(
dplyr
)
# Data manipulation
library
(
purrr
)
# Functional programming
library
(
stringr
)
# String manipulation
library
(
readr
)
# Reading and writing data
# Define a function to process a single CNV file
process_file
<-
function
(
cnv_file
,
output_dir
)
{
# Read the CNV file into a data frame
cnv_data
<-
read.table
(
cnv_file
,
header
=
TRUE
,
sep
=
"\t"
,
stringsAsFactors
=
FALSE
)
# Define the column names for Neanderthal and Sapiens samples
neanderthal_cols
<-
c
(
"Alt5_2014"
,
"Cha8_2020"
,
"Mez1_2018"
,
"Spy9_2018"
,
"Vin3_2017"
,
"Mez2_2018"
,
"LesC_2018"
,
"Goy5_2018"
)
sapiens_cols
<-
c
(
"Usti_2014"
,
"Pes1_2021"
,
"Malt_2014"
,
"Losc_2014"
,
"Mo12_2014"
,
"Stut_2014"
)
# Function to count how many CNVs differ from the reference
count_differences
<-
function
(
ref
,
values
)
{
ref_rounded
<-
round
(
ref
)
values_rounded
<-
round
(
values
)
sum
(
values_rounded
!=
ref_rounded
)
}
# Function to determine gain/loss annotation for Neanderthals
annotate_gain_loss
<-
function
(
ref
,
values
)
{
ref_rounded
<-
round
(
ref
)
values_rounded
<-
round
(
values
)
gain
<-
any
(
values_rounded
>=
ref_rounded
+
0.5
)
loss
<-
any
(
values_rounded
<
ref_rounded
-
0.5
)
# Return annotations based on gain/loss detection
if
(
gain
&&
loss
)
{
return
(
c
(
"neand"
,
"neand"
))
}
else
if
(
gain
)
{
return
(
c
(
"neand"
,
NA
))
}
else
if
(
loss
)
{
return
(
c
(
NA
,
"neand"
))
}
else
{
return
(
c
(
NA
,
NA
))
}
}
# Function to determine gain/loss annotation for Sapiens
annotate_sapiens_gain_loss
<-
function
(
ref
,
values
)
{
ref_rounded
<-
round
(
ref
)
values_rounded
<-
round
(
values
)
gain
<-
any
(
values_rounded
>=
ref_rounded
+
0.5
)
loss
<-
any
(
values_rounded
<
ref_rounded
-
0.5
)
# Return annotations based on gain/loss detection
if
(
gain
&&
loss
)
{
return
(
c
(
"sap"
,
"sap"
))
}
else
if
(
gain
)
{
return
(
c
(
"sap"
,
NA
))
}
else
if
(
loss
)
{
return
(
c
(
NA
,
"sap"
))
}
else
{
return
(
c
(
NA
,
NA
))
}
}
# Add columns for the counts and combined gain/loss annotations
cnv_data
<-
cnv_data
%>%
rowwise
()
%>%
mutate
(
Neanderthal_differences
=
count_differences
(
GeneRefCopies
,
c_across
(
all_of
(
neanderthal_cols
))),
Sapiens_differences
=
count_differences
(
GeneRefCopies
,
c_across
(
all_of
(
sapiens_cols
))),
gain_neand
=
ifelse
(
Neanderthal_differences
>
4
,
annotate_gain_loss
(
GeneRefCopies
,
c_across
(
all_of
(
neanderthal_cols
)))[
1
],
NA
),
loss_neand
=
ifelse
(
Neanderthal_differences
>
4
,
annotate_gain_loss
(
GeneRefCopies
,
c_across
(
all_of
(
neanderthal_cols
)))[
2
],
NA
),
gain_sap
=
ifelse
(
Sapiens_differences
>
3
,
annotate_sapiens_gain_loss
(
GeneRefCopies
,
c_across
(
all_of
(
sapiens_cols
)))[
1
],
NA
),
loss_sap
=
ifelse
(
Sapiens_differences
>
3
,
annotate_sapiens_gain_loss
(
GeneRefCopies
,
c_across
(
all_of
(
sapiens_cols
)))[
2
],
NA
),
gain
=
case_when
(
!
is.na
(
gain_neand
)
&
!
is.na
(
gain_sap
)
~
paste
(
gain_neand
,
gain_sap
,
sep
=
", "
),
!
is.na
(
gain_neand
)
~
gain_neand
,
!
is.na
(
gain_sap
)
~
gain_sap
,
TRUE
~
NA_character_
),
loss
=
case_when
(
!
is.na
(
loss_neand
)
&
!
is.na
(
loss_sap
)
~
paste
(
loss_neand
,
loss_sap
,
sep
=
", "
),
!
is.na
(
loss_neand
)
~
loss_neand
,
!
is.na
(
loss_sap
)
~
loss_sap
,
TRUE
~
NA_character_
)
)
%>%
ungroup
()
%>%
select
(
-
gain_neand
,
-
loss_neand
,
-
gain_sap
,
-
loss_sap
)
# Remove intermediate columns
# Define the output file name by modifying the input file name
output_file
<-
file.path
(
output_dir
,
paste0
(
basename
(
cnv_file
),
"_with_gain_loss.tsv"
))
# Write the processed data to the output file
write.table
(
cnv_data
,
output_file
,
sep
=
"\t"
,
row.names
=
FALSE
,
quote
=
FALSE
)
}
# Example usage: Apply the function to a list of CNV files in the specified input directory
process_cnv_files
<-
function
(
input_dir
,
output_dir
,
file_pattern
=
"with_CNV_pop_lvl.tsv$"
)
{
# List all files in the input directory that match the specified pattern
cnv_files
<-
list.files
(
input_dir
,
pattern
=
file_pattern
,
full.names
=
TRUE
)
# Process each CNV file and save the results to the output directory
walk
(
cnv_files
,
~
process_file
(
.x
,
output_dir
))
}
# Example invocation
# process_cnv_files(input_dir = "path/to/your/input", output_dir = "path/to/your/output")
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment