library(tidyverse)
library(here)
library(fs)
library(readxl)
library(janitor)
library(stringr) 
library(lubridate)
library(tidyr)
library(dplyr)
library(hms)

#This script is to merge CTD casts with the logbook  
#Read in the casts
ctd_data <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/CEOS Data Processing/ros/2021_wk_ros_ctd_sn7798/data/09_split")))  %>% #created a list of files to be imported
  mutate(data = pmap(list(files),
                     ~ read_tsv(..1, col_names = FALSE, show_col_types = FALSE))) %>% #imported the files
  mutate(data = pmap(list(files, data), 
                     ~ mutate(..2, source_file = as.character(..1)))) %>% #added file name to each row in each file, will be used later to cross-reference the logbook
  select(data) %>%
  map_df(bind_rows) %>%  #joined all tables into one
  filter(!grepl("#", X1)) %>% #removed rows containing # (metadata)
  filter(!grepl("\\*", X1)) #removed rows containing * (metadata)

#Remove the upcasts
ctd_data <- ctd_data %>% filter(!grepl("C:/Users/katey/Desktop/CEOS Data Processing/ros/2021_wk_ros_ctd_sn7798/data/09_split/u", source_file))

#Remove unnecessary text from key column in ctd_data
ctd_data <- ctd_data %>%
  mutate(source_file = str_remove_all(source_file, "C:/Users/katey/Desktop/CEOS Data Processing/ros/2021_wk_ros_ctd_sn7798/data/09_split/dWK21_ROS_|\\.cnv")) %>%
  mutate(source_file = as.numeric(source_file))

#Split data column into separate columns
ctd_data <- separate(data = ctd_data, col = X1,  sep = " +", into = c("0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37"))

#Subset needed columns
ctd_data <- ctd_data %>% select(c("20","2","29","5","34","4","33","27","28","35","30","31","32","9","10","11","8","21","22","25","26","source_file"))

#FYI, here are descriptions of what the columns are:
# name 20 = timeJ: Julian Days
# name 2 = prdM: Pressure, Strain Gauge [db]
# name 29 = depSM: Depth [salt water, m]
# name 5 = c0mS/cm: Conductivity [mS/cm]
# name 34 = sal00: Salinity, Practical [PSU]
# name 4 = tv290C: Temperature [ITS-90, deg C]
# name 33 = potemp090C: Potential Temperature [ITS-90, deg C]
# name 27 = density00: Density [density, kg/m^3]
# name 28 = sigma-é00: Density [sigma-theta, kg/m^3]
# name 35 = sva: Specific Volume Anomaly [10^-8 * m^3/kg]
# name 30 = sbeox0ML/L: Oxygen, SBE 43 [ml/l], WS = 2
# name 31 = sbox0Mm/Kg: Oxygen, SBE 43 [umol/kg], WS = 2
# name 32 = sbeox0PS: Oxygen, SBE 43 [% saturation], WS = 2
# name 9 = wetCDOM: Fluorescence, WET Labs CDOM [mg/m^3]
# name 10 = flECO-AFL: Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]
# name 11 = flECO-AFL1: Fluorescence, WET Labs ECO-AFL/FL, 2 [mg/m^3]
# name 8 = par: PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]
# name 21 = spar: SPAR, Biospherical/Licor [umol photons/m^2/sec]
# name 22 = cpar: CPAR/Corrected Irradiance [%]
# name 25 = latitude: Latitude [deg]
# name 26 = longitude: Longitude [deg]

#Convert time column
names(ctd_data)[names(ctd_data) == "20"] <- "timej" #renamed column
ctd_data <- ctd_data %>% mutate(timej = as.numeric(timej)) #made column numeric
ctd_data$datetime <- ISOdatetime(2021, 1, 1, 0, 0, 0, tz="UTC") + (ctd_data$timej - 1)*86400 #converted as per SBE instructions, https://blog.seabird.com/tech-tip-julian-days-format/

#Read in the logbook
logbook <- read_excel("C:/Users/katey/Desktop/CEOS Data Processing/ros/2021_wk_ros_ctd_sn7798/logbooks/wk21_ship_log.xlsx", sheet = "Sheet1", na = "NA") %>% 
  filter(grepl("ROS", code)) %>% #keep only CTD rows
  select(station_id, cast_id, bottom_depth_m)

#Add a missing STN ID
logbook <- within(logbook, station_id[cast_id == "9"] <- "STN2")

#Merge CTD data with logbook 
merged <- left_join(ctd_data, logbook, by=c("source_file"="cast_id"))

#Prepare the Excel CTD file 
merged$Cruise <- "2021 JBE"
merged$year <- as.numeric(format(merged$datetime,'%Y'))
merged$month <- as.numeric(format(merged$datetime,'%m'))
merged$day <- as.numeric(format(merged$datetime,'%d'))
merged$hour <- as.numeric(format(merged$datetime,'%H'))
merged$minute <- as.numeric(format(merged$datetime,'%M'))
merged$second <- as.numeric(format(merged$datetime,'%S'))
merged <- merged %>%
  select(c("Cruise","station_id", "source_file", "datetime", "year", "month", "day", "hour", "minute", "second", "25", "26", "bottom_depth_m", "2","29","5","34","4","33","27","28","35","30","31","32","9","10","11","8","21","22"))
excel <- merged
names(excel)[names(excel) == "station_id"] <- "Station"
names(excel)[names(excel) == "source_file"] <- "Cast"
names(excel)[names(excel) == "datetime"] <- "Timestamp (UTC)"
names(excel)[names(excel) == "year"] <- "Year (UTC)"
names(excel)[names(excel) == "month"] <- "Month (UTC)"
names(excel)[names(excel) == "day"] <- "Day (UTC)"
names(excel)[names(excel) == "hour"] <- "Hour (UTC)"
names(excel)[names(excel) == "minute"] <- "Minute (UTC)"
names(excel)[names(excel) == "second"] <- "Second (UTC)"
names(excel)[names(excel) == "25"] <- "Latitude [Degrees North]"
names(excel)[names(excel) == "26"] <- "Longitude [Degrees East]"
names(excel)[names(excel) == "bottom_depth_m"] <- "Bot. Depth [m]"
names(excel)[names(excel) == "2"] <- "Pressure, Strain Gauge [db]"
names(excel)[names(excel) == "29"] <- "Depth [salt water, m]"
names(excel)[names(excel) == "5"] <- "Conductivity [mS/cm]"
names(excel)[names(excel) == "34"] <- "Salinity, Practical [PSU]"
names(excel)[names(excel) == "4"] <- "Temperature [ITS-90, deg C]"
names(excel)[names(excel) == "33"] <- "Potential Temperature [ITS-90, deg C]"
names(excel)[names(excel) == "27"] <- "Density [kg/m^3]"
names(excel)[names(excel) == "28"] <- "Density [sigma-theta, kg/m^3]"
names(excel)[names(excel) == "35"] <- "Specific Volume Anomaly [10^-8 m^3/kg]"
names(excel)[names(excel) == "30"] <- "Oxygen, SBE 43 [ml/l], WS = 2"
names(excel)[names(excel) == "31"] <- "Oxygen, SBE 43 [umol/kg], WS = 2"
names(excel)[names(excel) == "32"] <- "Oxygen, SBE 43 [% saturation], WS = 2"
names(excel)[names(excel) == "9"] <- "Fluorescence, WET Labs CDOM [mg/m^3]"
names(excel)[names(excel) == "10"] <- "Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]"
names(excel)[names(excel) == "11"] <- "Fluorescence, WET Labs ECO-AFL/FL, 2 [mg/m^3]"
names(excel)[names(excel) == "8"] <- "PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]"
names(excel)[names(excel) == "21"] <- "SPAR, Biospherical/Licor [umol photons/m^2/sec]"
names(excel)[names(excel) == "22"] <- "CPAR/Corrected Irradiance [%]"

#Prepare ODV file
odv <- merged

odv$Type <- "C" #Type C for CTD
names(odv)[names(odv) == "26"] <- "longitude"
odv <- odv %>% mutate(longitude = as.numeric(longitude)) #make column numeric
odv$dec_long_east <- 360 + odv$longitude #make longitude positive degrees East for ODV
odv <- odv %>%
  select(c("Cruise", "Type", "station_id", "source_file", "year", "month", "day", "hour", "minute", "second", "25", "dec_long_east", "bottom_depth_m", "2","29","5","34","4","33","27","28","35","30","31","32","9","10","11","8","21","22"))

names(odv)[names(odv) == "station_id"] <- "Station"
names(odv)[names(odv) == "source_file"] <- "Cast"
names(odv)[names(odv) == "year"] <- "Year"
names(odv)[names(odv) == "month"] <- "Month"
names(odv)[names(odv) == "day"] <- "Day"
names(odv)[names(odv) == "hour"] <- "Hour"
names(odv)[names(odv) == "minute"] <- "Minute"
names(odv)[names(odv) == "second"] <- "Second"
names(odv)[names(odv) == "25"] <- "Latitude [Degrees North]"
names(odv)[names(odv) == "dec_long_east"] <- "Longitude [Degrees East]"
names(odv)[names(odv) == "bottom_depth_m"] <- "Bot. Depth [m]"
names(odv)[names(odv) == "2"] <- "Pressure, Strain Gauge [db]"
names(odv)[names(odv) == "29"] <- "Depth [salt water, m]"
names(odv)[names(odv) == "5"] <- "Conductivity [mS/cm]"
names(odv)[names(odv) == "34"] <- "Salinity, Practical [PSU]"
names(odv)[names(odv) == "4"] <- "Temperature [ITS-90,  ~^oC]"
names(odv)[names(odv) == "33"] <- "Potential Temperature [ITS-90, ~^oC]"
names(odv)[names(odv) == "27"] <- "Density [kg/m~^3]"
names(odv)[names(odv) == "28"] <- "Density [sigma-theta, kg/m~^3]"
names(odv)[names(odv) == "35"] <- "Specific Volume Anomaly [10~^-~^8 m~^3/kg]"
names(odv)[names(odv) == "30"] <- "Oxygen, SBE 43 [ml/l], WS = 2"
names(odv)[names(odv) == "31"] <- "Oxygen, SBE 43 [~$m~#mol/kg], WS = 2"
names(odv)[names(odv) == "32"] <- "Oxygen, SBE 43 [% saturation], WS = 2"
names(odv)[names(odv) == "9"] <- "Fluorescence, WET Labs CDOM [mg/m~^3]"
names(odv)[names(odv) == "10"] <- "Fluorescence, WET Labs ECO-AFL/FL [mg/m~^3]"
names(odv)[names(odv) == "11"] <- "Fluorescence, WET Labs ECO-AFL/FL, 2 [mg/m~^3]"
names(odv)[names(odv) == "8"] <- "PAR/Irradiance, Biospherical/Licor [~$m~#mol photons/m~^2/sec]"
names(odv)[names(odv) == "21"] <- "SPAR, Biospherical/Licor [~$m~#mol photons/m~^2/sec]"
names(odv)[names(odv) == "22"] <- "CPAR/Corrected Irradiance [%]"

#Export
write_excel_csv(excel, "C:/Users/katey/Desktop/CEOS Data Processing/ros/2021_wk_ros_ctd_sn7798/data/11_final/excel_2021_rosette_ctd_downcasts.csv", na = "NA")
write_tsv(odv, "C:/Users/katey/Desktop/CEOS Data Processing/ros/2021_wk_ros_ctd_sn7798/data/11_final/odv_2021_rosette_ctd_downcasts.txt", na = "-999")