Commit 349bf53c by righetti2

Upload New File

parent 02e348be
# Load necessary libraries
library(dplyr) # Data manipulation
library(purrr) # Functional programming
library(stringr) # String manipulation
library(readr) # Reading and writing data
# Define a function to process a single CNV file
process_file <- function(cnv_file, output_dir) {
# Read the CNV file into a data frame
cnv_data <- read.table(cnv_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
# Define the column names for Neanderthal and Sapiens samples
neanderthal_cols <- c("Alt5_2014", "Cha8_2020", "Mez1_2018", "Spy9_2018", "Vin3_2017", "Mez2_2018", "LesC_2018", "Goy5_2018")
sapiens_cols <- c("Usti_2014", "Pes1_2021", "Malt_2014", "Losc_2014", "Mo12_2014", "Stut_2014")
# Function to count how many CNVs differ from the reference
count_differences <- function(ref, values) {
ref_rounded <- round(ref)
values_rounded <- round(values)
sum(values_rounded != ref_rounded)
}
# Function to determine gain/loss annotation for Neanderthals
annotate_gain_loss <- function(ref, values) {
ref_rounded <- round(ref)
values_rounded <- round(values)
gain <- any(values_rounded >= ref_rounded + 0.5)
loss <- any(values_rounded < ref_rounded - 0.5)
# Return annotations based on gain/loss detection
if (gain && loss) {
return(c("neand", "neand"))
} else if (gain) {
return(c("neand", NA))
} else if (loss) {
return(c(NA, "neand"))
} else {
return(c(NA, NA))
}
}
# Function to determine gain/loss annotation for Sapiens
annotate_sapiens_gain_loss <- function(ref, values) {
ref_rounded <- round(ref)
values_rounded <- round(values)
gain <- any(values_rounded >= ref_rounded + 0.5)
loss <- any(values_rounded < ref_rounded - 0.5)
# Return annotations based on gain/loss detection
if (gain && loss) {
return(c("sap", "sap"))
} else if (gain) {
return(c("sap", NA))
} else if (loss) {
return(c(NA, "sap"))
} else {
return(c(NA, NA))
}
}
# Add columns for the counts and combined gain/loss annotations
cnv_data <- cnv_data %>%
rowwise() %>%
mutate(
Neanderthal_differences = count_differences(GeneRefCopies, c_across(all_of(neanderthal_cols))),
Sapiens_differences = count_differences(GeneRefCopies, c_across(all_of(sapiens_cols))),
gain_neand = ifelse(Neanderthal_differences > 4,
annotate_gain_loss(GeneRefCopies, c_across(all_of(neanderthal_cols)))[1], NA),
loss_neand = ifelse(Neanderthal_differences > 4,
annotate_gain_loss(GeneRefCopies, c_across(all_of(neanderthal_cols)))[2], NA),
gain_sap = ifelse(Sapiens_differences > 3,
annotate_sapiens_gain_loss(GeneRefCopies, c_across(all_of(sapiens_cols)))[1], NA),
loss_sap = ifelse(Sapiens_differences > 3,
annotate_sapiens_gain_loss(GeneRefCopies, c_across(all_of(sapiens_cols)))[2], NA),
gain = case_when(
!is.na(gain_neand) & !is.na(gain_sap) ~ paste(gain_neand, gain_sap, sep = ", "),
!is.na(gain_neand) ~ gain_neand,
!is.na(gain_sap) ~ gain_sap,
TRUE ~ NA_character_
),
loss = case_when(
!is.na(loss_neand) & !is.na(loss_sap) ~ paste(loss_neand, loss_sap, sep = ", "),
!is.na(loss_neand) ~ loss_neand,
!is.na(loss_sap) ~ loss_sap,
TRUE ~ NA_character_
)
) %>%
ungroup() %>%
select(-gain_neand, -loss_neand, -gain_sap, -loss_sap) # Remove intermediate columns
# Define the output file name by modifying the input file name
output_file <- file.path(output_dir, paste0(basename(cnv_file), "_with_gain_loss.tsv"))
# Write the processed data to the output file
write.table(cnv_data, output_file, sep = "\t", row.names = FALSE, quote = FALSE)
}
# Example usage: Apply the function to a list of CNV files in the specified input directory
process_cnv_files <- function(input_dir, output_dir, file_pattern = "with_CNV_pop_lvl.tsv$") {
# List all files in the input directory that match the specified pattern
cnv_files <- list.files(input_dir, pattern = file_pattern, full.names = TRUE)
# Process each CNV file and save the results to the output directory
walk(cnv_files, ~process_file(.x, output_dir))
}
# Example invocation
# process_cnv_files(input_dir = "path/to/your/input", output_dir = "path/to/your/output")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment