library(tidyverse)
library(here)
library(fs)
library(readxl)
library(janitor)
library(stringr) 
library(lubridate)
library(tidyr)
library(dplyr)
library(hms)

#This script is to merge CTD casts with the logbook  
#Read in the casts
ctd_data <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/CEOS Data Processing/ros/2023_wk_ros_ctd_sn7798/data/09_split")))  %>% #created a list of files to be imported
  mutate(data = pmap(list(files),
                     ~ read_tsv(..1, col_names = FALSE, show_col_types = FALSE))) %>% #imported the files
  mutate(data = pmap(list(files, data), 
                     ~ mutate(..2, source_file = as.character(..1)))) %>% #added file name to each row in each file, will be used later to cross-reference the logbook
  select(data) %>%
  map_df(bind_rows) %>%  #joined all tables into one
  filter(!grepl("#", X1)) %>% #removed rows containing # (metadata)
  filter(!grepl("\\*", X1)) #removed rows containing * (metadata)

#Remove the upcasts
ctd_data <- ctd_data %>% filter(!grepl("C:/Users/katey/Desktop/CEOS Data Processing/ros/2023_wk_ros_ctd_sn7798/data/09_split/u", source_file))

#Remove unnecessary text from key column in ctd_data
ctd_data <- ctd_data %>%
  mutate(source_file = stringr::str_sub(source_file, -6, -5)) %>% #extracted cast from source file name  
  mutate(source_file = as.numeric(source_file)) #made column numeric
  
#Split data column into separate columns
ctd_data <- separate(data = ctd_data, col = X1,  sep = " +", into = c("0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31"))

#Subset needed columns
ctd_data <- ctd_data %>% select(c("14","2","23","5","28","4","27","21","22","29","24","25","26","8","15","16","19","20","source_file"))

#FYI, here are descriptions of what the columns are:
# name 14 = timeJ: Julian Days
# name 2 = prdM: Pressure, Strain Gauge [db]
# name 23 = depSM: Depth [salt water, m]
# name 5 = c0mS/cm: Conductivity [mS/cm]
# name 28 = sal00: Salinity, Practical [PSU]
# name 4 = tv290C: Temperature [ITS-90, deg C]
# name 27 = potemp090C: Potential Temperature [ITS-90, deg C]
# name 21 = density00: Density [density, kg/m^3]
# name 22 = sigma-é00: Density [sigma-theta, kg/m^3]
# name 29 = sva: Specific Volume Anomaly [10^-8 * m^3/kg]
# name 24 = sbeox0ML/L: Oxygen, SBE 43 [ml/l], WS = 2
# name 25 = sbox0Mm/Kg: Oxygen, SBE 43 [umol/kg], WS = 2
# name 26 = sbeox0PS: Oxygen, SBE 43 [% saturation], WS = 2
# name 8 = par: PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]
# name 15 = spar: SPAR, Biospherical/Licor [umol photons/m^2/sec]
# name 16 = cpar: CPAR/Corrected Irradiance [%]
# name 19 = latitude: Latitude [deg]
# name 20 = longitude: Longitude [deg]

#Read in cast 16
cast16_path <- here("C:/Users/katey/Desktop/CEOS Data Processing/ros/2023_wk_ros_ctd_sn7798/data/12_cast_16/09_split_d2023_08_27_016.cnv")
cast16 <- read_tsv(cast16_path, col_names = FALSE, show_col_types = FALSE) %>%
  mutate(source_file = as.numeric("16")) %>% #add cast ID column
  filter(!grepl("#", X1)) %>% #remove metadata
  filter(!grepl("\\*", X1)) %>% #remove metadata
  mutate('19' = as.character(55.6188)) %>% #add latitude from logbook
  mutate('20' = as.character(-79.025)) #add longitude from logbook

#Split cast 16 data column into separate columns and remove unnecessary columns
cast16 <- separate(data = cast16, col = X1,  sep = " +", into = c("A","B","2","C","4","5","D","E","8","F","G","H","I","J","14","21","22","23","24","25","26","27","28","29","K","L"))
cast16 <- cast16 %>% select(c("2","4","5","8","14","21","22","23","24","25","26","27","28","29","19","20","source_file"))

#Add cast 16 data to the rest of CTD data
ctd_data <- bind_rows(ctd_data, cast16)

#Convert time column
names(ctd_data)[names(ctd_data) == "14"] <- "timej" #renamed column
ctd_data <- ctd_data %>% mutate(timej = as.numeric(timej)) #made column numeric
ctd_data$datetime <- ISOdatetime(2023, 1, 1, 0, 0, 0, tz="UTC") + (ctd_data$timej - 1)*86400 #converted as per SBE instructions, https://blog.seabird.com/tech-tip-julian-days-format/

#Read in the logbook for merging. The main ship logbook was duplicated, and all entries other than ROS were deleted. All unnecessary columns were deleted. A column titled "cast" was added, and cast ID information was filled in using cast start times from the .cnv files. 
logbook <- read_excel("C:/Users/katey/Desktop/CEOS Data Processing/ros/2023_wk_ros_ctd_sn7798/logbooks/merge_key.xlsx", sheet = "Sheet1", na = "NA") 

#Merge CTD data with logbook 
merged <- left_join(ctd_data, logbook, by=c("source_file"="Cast"))

#Prepare the Excel CTD file 
merged$Cruise <- "2023 James Bay and Belcher Islands Expedition"
merged$year <- as.numeric(format(merged$datetime,'%Y'))
merged$month <- as.numeric(format(merged$datetime,'%m'))
merged$day <- as.numeric(format(merged$datetime,'%d'))
merged$hour <- as.numeric(format(merged$datetime,'%H'))
merged$minute <- as.numeric(format(merged$datetime,'%M'))
merged$second <- as.numeric(format(merged$datetime,'%S'))
merged <- merged %>%
  select(c("Cruise","Station", "source_file", "datetime", "year", "month", "day", "hour", "minute", "second", "19", "20", "bottom_depth_m", "2","23","5","28","4","27","21","22","29","24","25","26","8","15","16","Notes"))
excel <- merged
names(excel)[names(excel) == "source_file"] <- "Cast"
names(excel)[names(excel) == "datetime"] <- "Timestamp (UTC)"
names(excel)[names(excel) == "year"] <- "Year (UTC)"
names(excel)[names(excel) == "month"] <- "Month (UTC)"
names(excel)[names(excel) == "day"] <- "Day (UTC)"
names(excel)[names(excel) == "hour"] <- "Hour (UTC)"
names(excel)[names(excel) == "minute"] <- "Minute (UTC)"
names(excel)[names(excel) == "second"] <- "Second (UTC)"
names(excel)[names(excel) == "19"] <- "Latitude [Degrees North]"
names(excel)[names(excel) == "20"] <- "Longitude [Degrees East]"
names(excel)[names(excel) == "bottom_depth_m"] <- "Bot. Depth [m]"
names(excel)[names(excel) == "2"] <- "Pressure, Strain Gauge [db]"
names(excel)[names(excel) == "23"] <- "Depth [salt water, m]"
names(excel)[names(excel) == "5"] <- "Conductivity [mS/cm]"
names(excel)[names(excel) == "28"] <- "Salinity, Practical [PSU]"
names(excel)[names(excel) == "4"] <- "Temperature [ITS-90, deg C]"
names(excel)[names(excel) == "27"] <- "Potential Temperature [ITS-90, deg C]"
names(excel)[names(excel) == "21"] <- "Density [kg/m^3]"
names(excel)[names(excel) == "22"] <- "Density [sigma-theta, kg/m^3]"
names(excel)[names(excel) == "29"] <- "Specific Volume Anomaly [10^-8 m^3/kg]"
names(excel)[names(excel) == "24"] <- "Oxygen, SBE 43 [ml/l], WS = 2"
names(excel)[names(excel) == "25"] <- "Oxygen, SBE 43 [umol/kg], WS = 2"
names(excel)[names(excel) == "26"] <- "Oxygen, SBE 43 [% saturation], WS = 2"
names(excel)[names(excel) == "8"] <- "PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]"
names(excel)[names(excel) == "15"] <- "SPAR, Biospherical/Licor [umol photons/m^2/sec]"
names(excel)[names(excel) == "16"] <- "CPAR/Corrected Irradiance [%]"

#Prepare ODV file
odv <- merged

odv$Type <- "C" #Type C for CTD
names(odv)[names(odv) == "20"] <- "longitude"
odv <- odv %>% mutate(longitude = as.numeric(longitude)) #make column numeric
odv$dec_long_east <- 360 + odv$longitude #make longitude positive degrees East for ODV
odv <- odv %>%
  select(c("Cruise", "Type", "Station", "source_file", "year", "month", "day", "hour", "minute", "second", "19", "dec_long_east", "bottom_depth_m", "2","23","5","28","4","27","21","22","29","24","25","26","8","15","16","Notes"))

names(odv)[names(odv) == "source_file"] <- "Cast"
names(odv)[names(odv) == "year"] <- "Year"
names(odv)[names(odv) == "month"] <- "Month"
names(odv)[names(odv) == "day"] <- "Day"
names(odv)[names(odv) == "hour"] <- "Hour"
names(odv)[names(odv) == "minute"] <- "Minute"
names(odv)[names(odv) == "second"] <- "Second"
names(odv)[names(odv) == "19"] <- "Latitude [Degrees North]"
names(odv)[names(odv) == "dec_long_east"] <- "Longitude [Degrees East]"
names(odv)[names(odv) == "bottom_depth_m"] <- "Bot. Depth [m]"
names(odv)[names(odv) == "2"] <- "Pressure, Strain Gauge [db]"
names(odv)[names(odv) == "23"] <- "Depth [salt water, m]"
names(odv)[names(odv) == "5"] <- "Conductivity [mS/cm]"
names(odv)[names(odv) == "28"] <- "Salinity, Practical [PSU]"
names(odv)[names(odv) == "4"] <- "Temperature [ITS-90,  ~^oC]"
names(odv)[names(odv) == "27"] <- "Potential Temperature [ITS-90, ~^oC]"
names(odv)[names(odv) == "21"] <- "Density [kg/m~^3]"
names(odv)[names(odv) == "22"] <- "Density [sigma-theta, kg/m~^3]"
names(odv)[names(odv) == "29"] <- "Specific Volume Anomaly [10~^-~^8 m~^3/kg]"
names(odv)[names(odv) == "24"] <- "Oxygen, SBE 43 [ml/l], WS = 2"
names(odv)[names(odv) == "25"] <- "Oxygen, SBE 43 [~$m~#mol/kg], WS = 2"
names(odv)[names(odv) == "26"] <- "Oxygen, SBE 43 [% saturation], WS = 2"
names(odv)[names(odv) == "8"] <- "PAR/Irradiance, Biospherical/Licor [~$m~#mol photons/m~^2/sec]"
names(odv)[names(odv) == "15"] <- "SPAR, Biospherical/Licor [~$m~#mol photons/m~^2/sec]"
names(odv)[names(odv) == "16"] <- "CPAR/Corrected Irradiance [%]"

#Export
write_excel_csv(excel, "C:/Users/katey/Desktop/CEOS Data Processing/ros/2023_wk_ros_ctd_sn7798/data/11_final/excel_2023_rosette_ctd_downcasts.csv", na = "NA")
write_tsv(odv, "C:/Users/katey/Desktop/CEOS Data Processing/ros/2023_wk_ros_ctd_sn7798/data/11_final/odv_2023_rosette_ctd_downcasts.txt", na = "-999")