#!/usr/bin/env Rscript
# coding: utf-8
#Author: Tanja Krueger 
#Aim: This file visualizes the protein lengths of the SignalP 6.0 data as 
#     comparison to our own analysis.
#Input: A csv file with protein length in the first column and second column the classes 
#Output: A shared plot depicting the protein length of SignalP data.

##################################################################################
#Step0: Get all the packages
require(flexplot)
require(tidyverse)
require(cowplot)
library(readxl)
library(readODS)
library(ggplot2)
library(see)
library(tidyverse)
library(ragg)
library(colorspace)
library(ggdist)
library(here)
library(gghalves)
getwd()
##################################################################################
#Step1: Read the excel file into a data frame
df1 <- read.csv("Data/derived/length_output_SignalP.csv")

#Step2: Set column names that are missing in the beginning
colnames(df1) <- c("length","type")

#Step3: Set spec as a factor
df1$type <- as.factor(df1$type)
df1<-df1 %>% 
  mutate(
    type = fct_rev(fct_inorder(type)),
    type_num = as.integer(type)
  )
head(df1,5)

#Step4: Visualize the data in violin plots and boxplots, for this cut the yaxis
theme_update(
  plot.margin = margin(rep(20,6)),
  #panel.grid.major = element_line(color = "white",size=1),
  legend.position="none")
pal <- c( "#B1041B","#EABA49","#2156B5","#61BDD2" )
p<-ggplot(df1, aes(x=type_num, y=length, color = type, fill = type)) +
  geom_boxplot(
    aes(fill = type, fill = after_scale(desaturate(lighten(fill, .8), .4))),
    width = .3, outlier.shape = NA, size = .4,show.legend=FALSE,position = position_nudge(x = 0.15)
  ) +
  gghalves::geom_half_point(
    aes(fill = type, fill = after_scale(darken(fill, .2))),
    side = "l", size = 0.5, range_scale = .5, alpha = .3, 
    width = 1, shape = 21, color = "white", stroke = 0,show.legend=FALSE) +
  scale_x_continuous(breaks = 1:4, labels = rev(unique(df1$type)), 
                     expand = c(.001, .001))  +
  scale_y_continuous(breaks = seq(0, 2000, by = 250))+
  scale_color_manual(values = pal) +
  scale_fill_manual(values = pal) +
  coord_cartesian(clip = "off", ylim = c(0, 2000)) +
  theme(panel.background = element_rect(fill = "#eaeaf2"),
        panel.grid.major.y = element_line(color = "white",size=1),
        panel.grid.major.x = element_blank(),  # Turn off vertical major grid
        panel.grid.minor.x = element_line(color = "white",size=1),
        legend.position="none")+
  labs(y = "sequence length",x="")
p
#Step 5: Save the plot
ggsave("Figures/shared_lengths_SingnalP.png",
       width = 6,
       height =5,,
       dpi = 300)

# Step 6:log which packages are used by this script
writeLines(capture.output(sessionInfo()),"Data/derived/used_packages_length_SignalP.txt")
