Commit 25fc6172 authored by Mathias Walzer's avatar Mathias Walzer
Browse files

updated plot code & added functionality for original data comparison

parent dac8a0b7
......@@ -11,8 +11,78 @@ for (p in pxds ){
assign(paste("Correlation_original",p,sep="_"), correlate_techreps(df2))
}
# (Correlation_reanalysis_PXD003497 | Correlation_original_PXD003497 ) /
# (Correlation_reanalysis_PXD004873 | Correlation_original_PXD004873 ) /
# (Correlation_reanalysis_PXD014194 | Correlation_original_PXD014194 ) /
# plot_annotation(tag_levels = 'A')
(Correlation_reanalysis_PXD003497 | Correlation_original_PXD003497 ) /
(Correlation_reanalysis_PXD004873 | Correlation_original_PXD004873 ) /
(Correlation_reanalysis_PXD014194 | Correlation_original_PXD014194 ) /
plot_annotation(tag_levels = 'A')
panel_cr <- (Correlation_reanalysis_PXD003497 + xlim(0,25) + ylim(0,25) + labs(title = "PXD003497", subtitle = "(Reanalysis)") |
Correlation_original_PXD003497 + xlim(0,25) + ylim(0,25) + labs(title = "PXD003497", subtitle = "(Original)") ) /
(Correlation_reanalysis_PXD004873 + xlim(0,25) + ylim(0,25) + labs(title = "PXD004873", subtitle = "(Reanalysis)") |
Correlation_original_PXD004873 + xlim(0,25) + ylim(0,25) + labs(title = "PXD004873", subtitle = "(Original)") ) /
(Correlation_reanalysis_PXD014194 + xlim(0,25) + ylim(0,25) + labs(title = "PXD014194", subtitle = "(Reanalysis)") |
Correlation_original_PXD014194 + xlim(0,25) + ylim(0,25) + labs(title = "PXD014194", subtitle = "(Original)") )/
plot_annotation(tag_levels = 'A', title = 'Protein correlation in technical replicates')
ggsave(
"dia_paper_correlation_panel_top3update_axisfix.pdf",
plot = panel_cr,
device = "pdf",
scale = 1,
width = 38,
height = 38,
units = "cm" ,
dpi = 300,
limitsize = TRUE
)
ours <- ours_PXD004873()
theirs <- theirs_PXD004873()
Correlation_combined_PXD004873 <- correlate_study_trs(ours,theirs)
Correlation_runs_PXD004873 <- correlate_study(ours,theirs)
panel_ic <- Correlation_runs_PXD004873 + xlab("Replicate 1 (log2 intensity)") + ylab("Replicate 2 (log2 intensity)")
ggsave(
"dia_paper_PXD004873_ProtIntCorrel_top3update_outliertreated.pdf",
plot = panel_ic,
device = "pdf",
scale = 1,
width = 38,
height = 76,
units = "cm" ,
dpi = 300,
limitsize = TRUE
)
# source("../container/postprocess/DIA_postprocess_correlation.R")
# correlate_techreps_logratios(ours_PXD004873() %>% filter(feature_missingrate_per_group < 50 ))
source("FC_variance_results_datacarpentry.R")
source("../container/postprocess/DIA_postprocess_correlation.R")
source("../container/postprocess/codify_study_customisations.R")
source("../container/postprocess/codify_originalresult_integration.R")
pxds <- c("PXD003497","PXD014194","PXD004873")
for (p in pxds ){
print(p)
df1 <- get(paste("ours_1_1_top3_",p,sep=""))() %>% filter(feature_missingrate_per_group < 50)
assign(paste("Correlation_reanalysis_top3",p,sep="_"), correlate_techreps(df1))
df2 <- get(paste("theirs_",p,sep=""))() %>% mutate(NormLogIntensities = LogIntensities) # we have to assume their intensities are normalised and filtered
assign(paste("Correlation_original",p,sep="_"), correlate_techreps(df2))
}
(Correlation_reanalysis_top3_PXD003497 + xlim(0,25) + ylim(0,25) + labs(title = "PXD003497", subtitle = "(Reanalysis)") |
Correlation_original_PXD003497 + xlim(0,25) + ylim(0,25) + labs(title = "PXD003497", subtitle = "(Original)") ) /
(Correlation_reanalysis_top3_PXD004873 + xlim(0,25) + ylim(0,25) + labs(title = "PXD004873", subtitle = "(Reanalysis)") |
Correlation_original_PXD004873 + xlim(0,25) + ylim(0,25) + labs(title = "PXD004873", subtitle = "(Original)") ) /
(Correlation_reanalysis_top3_PXD014194 + xlim(0,25) + ylim(0,25) + labs(title = "PXD014194", subtitle = "(Reanalysis)") |
Correlation_original_PXD014194 + xlim(0,25) + ylim(0,25) + labs(title = "PXD014194", subtitle = "(Original)") )/
plot_annotation(tag_levels = 'A', title = 'Protein correlation in technical replicates')
ours <- ours_1_1_top3_PXD004873()
theirs <- theirs_PXD004873()
Correlation_combined_PXD004873_top3 <- correlate_study_trs(ours,theirs)
Correlation_all_PXD004873_top3 <- correlate_study(ours,theirs)
......@@ -52,7 +52,7 @@ correlate_techreps <- function(df) {
scale_color_viridis_c(option = "magma", guide=FALSE) +
geom_smooth(method=lm) +
stat_cor(p.accuracy = 0.001, r.accuracy = 0.01) +
labs(title = "Protein correlation in technical replicates", x = "Replicate 1", y = "Replicate 2")
labs(title = "Protein correlation in technical replicates", x = "Replicate 1 (log2 intensity)", y = "Replicate 2 (log2 intensity)")
return(plot)
}
......@@ -62,18 +62,31 @@ correlate_techreps <- function(df) {
# theirs: df argument, long form, needs cols `c(LogIntensities,Protein,originalRUN)`
correlate_study <- function(ours,theirs) {
# merge ours with theirs
corr <- ours %>% mutate(merge_id = paste(Protein,originalRUN,sep="")) %>%
select(LogIntensities,Protein,merge_id) %>%
inner_join(
corr <- ours %>%
dplyr::mutate(merge_id = paste(Protein,originalRUN,sep="")) %>%
dplyr::select(LogIntensities,Protein,merge_id) %>%
dplyr::inner_join(
theirs %>%
mutate(merge_id = paste(Protein,originalRUN,sep="")) %>%
select(LogIntensities,Protein,originalRUN,merge_id),
dplyr::mutate(merge_id = paste(Protein,originalRUN,sep="")) %>%
dplyr::select(LogIntensities,Protein,originalRUN,merge_id),
by="merge_id"
) %>% drop_na() %>%
select(-merge_id) %>%
rename(run = originalRUN) %>%
filter(is.finite(LogIntensities.x)) %>%
filter(is.finite(LogIntensities.y))
) %>% tidyr::drop_na() %>%
dplyr::select(-merge_id) %>%
dplyr::rename(run = originalRUN) %>%
dplyr::filter(is.finite(LogIntensities.x)) %>%
dplyr::filter(is.finite(LogIntensities.y))
outlier_threshold <- log2(0.05)
corr_w_out <- corr %>% mutate(outliers = LogIntensities.x < outlier_threshold | LogIntensities.y < outlier_threshold) %>%
group_by(run) %>% summarise(outliers = sum(outliers)) %>%
filter(outliers > 0)
print("Removed this many 'close to zero' intensity pairs")
print(corr_w_out)
# crude outlier removal
corr <- corr %>%
dplyr::filter(LogIntensities.x >= outlier_threshold) %>%
dplyr::filter(LogIntensities.y >= outlier_threshold)
corr$density <- get_density(corr$LogIntensities.x, corr$LogIntensities.y, n = 100)
......@@ -88,8 +101,142 @@ correlate_study <- function(ours,theirs) {
return(plot)
}
correlate_study_trs <- function(ours,theirs) {
if("sample_id" %in% names(ours)) {
corr_o <- df %>%
dplyr::filter(technical_replicate %in% c('a','b')) %>%
dplyr::mutate(sample_id = paste(Protein,sample_id,sep="_")) %>%
dplyr::select(technical_replicate,LogIntensities,sample_id) %>%
dplyr::filter(is.finite(LogIntensities)) %>%
tidyr::spread(technical_replicate, LogIntensities) %>%
dplyr::rename(a_ours = a, b_ours = b)
} else {
corr_o <- ours %>%
dplyr::filter(technical_replicate %in% c('a','b')) %>%
dplyr::mutate(sample_id = paste(GROUP_ORIGINAL,SUBJECT_ORIGINAL,sep="")) %>%
dplyr::select(technical_replicate,LogIntensities,Protein,sample_id) %>%
dplyr::filter(is.finite(LogIntensities)) %>%
tidyr::spread(technical_replicate, LogIntensities) %>%
dplyr::rename(a_ours = a, b_ours = b)
}
if("sample_id" %in% names(theirs)) {
corr_t <- df %>%
dplyr::filter(technical_replicate %in% c('a','b')) %>%
dplyr::mutate(sample_id = paste(Protein,sample_id,sep="_")) %>%
dplyr::select(technical_replicate,LogIntensities,sample_id) %>%
dplyr::filter(is.finite(LogIntensities)) %>%
tidyr::spread(technical_replicate, LogIntensities) %>%
dplyr::rename(a_theirs = a, b_theirs = b)
} else {
corr_t <- theirs %>%
dplyr::filter(technical_replicate %in% c('a','b')) %>%
dplyr::mutate(sample_id = paste(GROUP_ORIGINAL,SUBJECT_ORIGINAL,sep="")) %>%
dplyr::select(technical_replicate,LogIntensities,Protein,sample_id) %>%
dplyr::filter(is.finite(LogIntensities)) %>%
tidyr::spread(technical_replicate, LogIntensities) %>%
dplyr::rename(a_theirs = a, b_theirs = b)
}
# merge ours with theirs
corr_ot <- corr_o %>% inner_join(corr_t, by=c("sample_id","Protein")) %>%
dplyr::select(a_ours, b_theirs) %>%
dplyr::rename(a = a_ours, b = b_theirs) %>%
tidyr::drop_na(c(a,b))
corr_to <- corr_t %>% inner_join(corr_o, by=c("sample_id","Protein")) %>%
dplyr::select(a_theirs, b_ours) %>%
dplyr::rename(a = a_theirs, b = b_ours) %>%
tidyr::drop_na(c(a,b))
# paste ot and to to one and calc density for all three
corr <- rbind(corr_ot, corr_to %>% dplyr::rename(a = b, b = a))
corr$density <- get_density(corr$a, corr$b, n = 100)
corr_ot$density <- get_density(corr_ot$a, corr_ot$b, n = 100)
corr_to$density <- get_density(corr_to$a, corr_to$b, n = 100)
plot_ot <- ggplot(corr_ot, aes(x = a, y = b, color = density)) +
geom_point() +
#scale_fill_gradient(low="blue", high="red") +
scale_color_viridis_c(option = "magma", guide=FALSE) +
geom_smooth(method=lm) +
stat_cor(p.accuracy = 0.001, r.accuracy = 0.01) +
labs(title = "Protein correlation in technical replicates \n reanalysis vs. original",
x = "Replicate 1 (reanalysis)", y = "Replicate 2 (original)")
plot_to <- ggplot(corr_to, aes(x = a, y = b, color = density)) +
geom_point() +
#scale_fill_gradient(low="blue", high="red") +
scale_color_viridis_c(option = "magma", guide=FALSE) +
geom_smooth(method=lm) +
stat_cor(p.accuracy = 0.001, r.accuracy = 0.01) +
labs(title = "Protein correlation in technical replicates \n reanalysis vs. original",
x = "Replicate 1 (original)", y = "Replicate 2 (reanalysis)")
plot <- ggplot(corr, aes(x = a, y = b, color = density)) +
geom_point() +
#scale_fill_gradient(low="blue", high="red") +
scale_color_viridis_c(option = "magma", guide=FALSE) +
geom_smooth(method=lm) +
stat_cor(p.accuracy = 0.001, r.accuracy = 0.01) +
labs(title = "Protein correlation in technical replicates \n reanalysis vs. original",
x = "Replicate 1", y = "Replicate 2")
return_list <- list("corr_R1_ours_R2_theirs" = plot_ot,
"corr_R1_theirs_R2_ours" = plot_to,
"corr_R1_ours_R2_theirs_and_R1_theirs_R2_ours" = plot)
return(return_list)
}
# Example
# ours <- ours_PXD004873()
# theirs <- theirs_PXD004873()
# p_corr_ours <- correlate_techreps(ours)
# p_corr_vs <- correlate_study(ours,theirs)
correlate_techreps_logratios <- function(df) {
# in case different technical replicates are made, an optional sample_id column can be used to (uniquely) identifiy TR pairs
if("sample_id" %in% names(df)) {
corr <- df %>%
dplyr::filter(technical_replicate %in% c('a','b')) %>%
dplyr::mutate(sample_id = paste(Protein,sample_id,sep="_")) %>%
dplyr::select(technical_replicate,LogIntensities,sample_id) %>%
dplyr::filter(is.finite(LogIntensities)) %>%
tidyr::spread(technical_replicate, LogIntensities) %>%
tidyr::drop_na(c(a,b))
} else {
corr <- df %>%
dplyr::filter(technical_replicate %in% c('a','b')) %>%
dplyr::mutate(sample_id = paste(GROUP_ORIGINAL,SUBJECT_ORIGINAL,sep="")) %>%
dplyr::select(technical_replicate,LogIntensities,Protein,sample_id) %>%
dplyr::filter(is.finite(LogIntensities)) %>%
tidyr::spread(technical_replicate, LogIntensities) %>%
tidyr::drop_na(c(a,b))
}
corr$ab <- corr$a - corr$b
corr$density <- get_density(corr$ab, corr$b, n = 100)
plot <- ggplot(corr, aes(x = b, y = ab, color = density)) +
geom_point() +
#scale_fill_gradient(low="blue", high="red") +
scale_color_viridis_c(option = "magma", guide=FALSE) +
geom_smooth(method=lm) +
#stat_cor(p.accuracy = 0.001, r.accuracy = 0.01) +
labs(title = "Protein correlation in technical replicates", y = "Log2(Replicate 1/Replicate 2)", x = "Replicate 2") +
geom_hline(yintercept=0, linetype="dashed", color = "red")
box <- ggplot(corr, aes(x=0, y = ab)) +
geom_boxplot(alpha = 0.80) +
scale_x_discrete() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
plot.margin = unit(c(0,0,0,0), "lines"),
plot.background = element_blank()) +
geom_hline(yintercept=0, linetype="dashed", color = "red")
return((plot | box ) + plot_layout(widths = c(10, 1)))
}
......@@ -46,7 +46,7 @@ fc_plot <- function(data, name, left_colour, right_colour, center_colour, fc_thr
)
)
),
aes(log.fc,log.pval, colour = threshold, size = 4)) +
aes(log.fc,log.pval, colour = threshold, size = 2)) +
geom_point(alpha = 0.75, show.legend = FALSE) +
geom_hline(yintercept = 1.3, linetype = 2, alpha = 0.5) + # threshold indicators, semi-transparent
geom_vline(xintercept = fc_threshold, linetype = 2, alpha = 0.5) +
......@@ -148,4 +148,34 @@ calc_contrasts <- function(name, contrast_list){
return_list[[as.name(paste(grp1,grp2,sep="-"))]] <- plt
}
return(return_list)
}
\ No newline at end of file
}
calc_ext_contrasts <- function(name, contrast_list){
return_list <- list()
print(name)
for (c in contrast_list ) {
grp1 <- c[[1]]
grp2 <- c[[2]]
data <- get(paste("theirs_",name,sep=""))()
print(paste(grp1,grp2,sep="-"))
data <- data %>% dplyr::filter(GROUP_ORIGINAL %in% c(grp1, grp2)) %>%
mutate(feature_missingrate_per_group = 1) %>% # assume already consistency filtered
mutate(NormLogIntensities = LogIntensities) %>% # assume already normalised
dplyr::filter(GROUP_ORIGINAL %in% c(grp1, grp2))
grp1_names <- data %>% filter(GROUP_ORIGINAL == grp1) %>% distinct(originalRUN) %>% pull(originalRUN) %>% as.character()
grp2_names <- data %>% filter(GROUP_ORIGINAL == grp2) %>% distinct(originalRUN) %>% pull(originalRUN) %>% as.character()
print( paste(length(grp1_names),"vs",length(grp2_names), sep = " ") )
plot_data <- do_fc(data, grp1 = grp1, grp1_names = grp1_names, grp2 = grp2, grp2_names = grp2_names)
return_list[[as.name(paste("contrast",grp1,grp2,sep="-"))]] <- plot_data
print(paste(nrow(plot_data), "Proteins considered",sep = " "))
plt <- fc_plot(plot_data, paste(name, paste(grp1,grp2,sep="-"), sep=" "))
return_list[[as.name(paste(grp1,grp2,sep="-"))]] <- plt
}
return(return_list)
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment