#!/usr/bin/env Rscript
# coding: utf-8

#AIm: This file visualizes the protein lengths
#Input: scv with protein length in the first column and second column the classes 
#Output: nothing decided yet"

#install.packages("see")
#install.packages("seaborn")
##################################################################################Step1: Get all the packages
require(flexplot)
require(tidyverse)
require(cowplot)
library(readxl)
library(readODS)
library(ggplot2)
library(see)
library(tidyverse)
library(ragg)
library(colorspace)
library(ggdist)
library(here)
library(gghalves)





getwd()

#Step1: Read the excel file into a data frame
df1 <- read.csv("Data/derived/length_output.csv")
#Step2: Set column names that are missing in the beginning
colnames(df1) <- c("length","type")
#Step3: Set spec as a factor
df1$type <- as.factor(df1$type)

df1<-df1 %>% 
  mutate(
    type = fct_rev(fct_inorder(type)),
    type_num = as.integer(type)
  )
head(df1,5)

#Step2: Visualize the data in violin plots and boxplots, for this cut the yaxis

theme_update(
  plot.margin = margin(rep(20,6)),
  #panel.grid.major = element_line(color = "white",size=1),
  legend.position="none",
  plot.background = element_rect(fill = "#21252b"),
  axis.title.x = element_text(color = "#869b9f"),
  axis.title.y = element_text(color = "#869b9f"),
  axis.text.x = element_text(color = "#869b9f"),
  axis.text.y = element_text(color = "#869b9f"))

pal <- c("#75ae74","#5faad3", "#c07f4e","#b96dbc" )



p<-ggplot(df1, aes(x=type_num, y=length, color = type, fill = type)) +
  ggdist::stat_halfeye(
    aes(fill = type, fill = after_scale(lighten(fill, 0))),
    adjust = .5, width = .9, .width = 0, justification = -.12, point_colour = NA,
    breaks = seq(min(df1$length), max(df1$length),  11,show.legend=FALSE)
  ) + 
  geom_boxplot(
    aes(fill = type, fill = after_scale(desaturate(lighten(fill, .8), .4))),
    width = .17, outlier.shape = NA, size = .4,show.legend=FALSE
  ) +
  gghalves::geom_half_point(
    aes(fill = type, fill = after_scale(darken(fill, .2))),
    side = "l", size = 0.5, range_scale = .5, alpha = .3, 
    width = .8, shape = 21, color = "white", stroke = 0,show.legend=FALSE) +
  scale_x_continuous(breaks = 1:4, labels = rev(unique(df1$type)), 
                     expand = c(.001, .001))  +
  scale_y_continuous(breaks = seq(0, 2000, by = 250))+
  scale_color_manual(values = pal) +
  scale_fill_manual(values = pal) +
  coord_cartesian(clip = "off", ylim = c(0, 2000)) +
  theme(panel.background = element_rect(fill = "#282c34"), 
        panel.grid.major.y = element_line(color = "#4a5865",size=0.25),
        panel.grid.major.x = element_line(color = "#4a5865",size=0.25),  
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        legend.position="none")+
  labs(y = "sequence length",x="")
p
ggsave("Figures/shared_lengths4.png",
       width = 6,
       height =5,,
       dpi = 300)
