library(tidyverse)
library(here)
library(fs)
library(readxl)
library(janitor)

#This script is to merge CTD casts with the logbook  
#Read in the casts
ctd_data_protected <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/2023_wk_auto_ctd_sn7783/data/09_split")))  %>% #created a list of files to be imported
  mutate(data = pmap(list(files),
                     ~ read_tsv(..1, col_names = FALSE))) %>% #imported the files
  mutate(data = pmap(list(files, data), 
                     ~ mutate(..2, source_file = as.character(..1)))) %>% #added file name to each row in each file, will be used later to cross-reference the logbook
  select(data) %>%
  map_df(bind_rows) %>%  #joined all tables into one
  filter(!grepl("#", X1)) %>% #removed rows containing # (metadata)
  filter(!grepl("\\*", X1)) %>% #removed rows containing * (metadata)
  filter(!grepl("C:/Users/katey/Desktop/2023_wk_auto_ctd_sn7783/data/09_split/uSBE19plus", source_file)) #removed all upcasts

#Create a working copy of the data so that you don't have to import again if a mistake happens
ctd_data <- ctd_data_protected

#Remove unnecessary text from key column in ctd_data
ctd_data <- ctd_data %>%
  mutate(source_file = str_remove_all(source_file, "C:/Users/katey/Desktop/2023_wk_auto_ctd_sn7783/data/09_split/dSBE19plus_01907783_")) %>%
  mutate(source_file = str_remove_all(source_file, ".cnv"))

#Change column name
names(ctd_data)[names(ctd_data) == "source_file"] <- "key"

#Split data column into separate columns
ctd_data <- separate(data = ctd_data, col = X1,  sep = " +", into = c("0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39"))

#Subset needed columns
ctd_data <- ctd_data %>% select(c("28","2","31","5","36","4","35","29","30","37","32","33","34","14","15","13","16","17","key"))

#FYI, here are descriptions of what the columns are:
# 28 = timeJ: Julian Days
# 2 = prdM: Pressure, Strain Gauge [db]
# 31 = depSM: Depth [salt water, m], lat = 56.3555
# 5 = c0mS/cm: Conductivity [mS/cm]
# 36 = sal00: Salinity, Practical [PSU]
# 4 = tv290C: Temperature [ITS-90, deg C]
# 35 = potemp090C: Potential Temperature [ITS-90, deg C]
# 29 = density00: Density [density, kg/m^3]
# 30 = sigma-é00: Density [sigma-theta, kg/m^3]
# 37 = sva: Specific Volume Anomaly [10^-8 * m^3/kg]
# 32 = sbeox0ML/L: Oxygen, SBE 43 [ml/l], WS = 2
# 33 = sbox0Mm/Kg: Oxygen, SBE 43 [umol/kg], WS = 2
# 34 = sbeox0PS: Oxygen, SBE 43 [% saturation], WS = 2
# 14 = wetCDOM: Fluorescence, WET Labs CDOM [mg/m^3]
# 15 = flECO-AFL: Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]
# 13 = par: PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]
# 16 = CStarTr0: Beam Transmission, WET Labs C-Star [%]
# 17 = CStarAt0: Beam Attenuation, WET Labs C-Star [1/m]

#Convert time column
names(ctd_data)[names(ctd_data) == "28"] <- "timej" #renamed column
ctd_data <- ctd_data %>% mutate(timej = as.numeric(timej)) #made column numeric
ctd_data$timejfixed <-  ifelse(ctd_data$timej < 220.7, ctd_data$timej + (7/24), ctd_data$timej) #fixed the first few casts that were in the wrong timezone
ctd_data$datetime <- ISOdatetime(2023, 1, 1, 0, 0, 0, tz="UTC") + (ctd_data$timejfixed - 1)*86400 #converted as per SBE instructions, https://blog.seabird.com/tech-tip-julian-days-format/

#Read in the logbook
logbook <- read_excel("C:/Users/katey/Desktop/2023_wk_auto_ctd_sn7783/logbooks/Ship_logbook_WK2023.xlsx", sheet = "Sheet1", skip = 2, na = "NA") %>% 
  clean_names() %>%
  filter(grepl("CTD", code)) %>% #keep only CTD rows
  select(station, date_yyyy_mm_dd, latitude_deg_7, longitude_deg_8, latitude_deg_9, longitude_deg_10, bottom_depth_m, notes) %>%
  mutate(date_yyyy_mm_dd = str_replace(date_yyyy_mm_dd, "-", "_")) %>%
  mutate(date_yyyy_mm_dd = str_replace(date_yyyy_mm_dd, "-", "_")) 
logbook$key <- paste(logbook$date_yyyy_mm_dd, logbook$station, sep="_") #create a key for merging
logbook$logbook_notes <- NA #create a column for tidy notes
logbook <- within(logbook, logbook_notes[key == "2023_08_03_CMO-A"] <- "Internal time in CTD was UTC−07:00, corrected here to be UTC")
logbook <- within(logbook, logbook_notes[key == "2023_08_01_CMO-B"] <- "Internal time in CTD was UTC−07:00, corrected here to be UTC")
logbook <- within(logbook, logbook_notes[key == "2023_08_08_CE1-a"] <- "Internal time in CTD was UTC−07:00, corrected here to be UTC")
logbook <- within(logbook, logbook_notes[key == "2023_08_08_CE1-b"] <- "Internal time in CTD was UTC−07:00, corrected here to be UTC")
logbook <- within(logbook, logbook_notes[key == "2023_08_23_BI-09-a"] <- "CTD hit bottom during this cast")
logbook <- within(logbook, logbook_notes[key == "2023_08_26_CTD-89"] <- "Associated with station BI-15")
logbook <- within(logbook, logbook_notes[key == "2023_08_26_BI-16-a"] <- "CTD hit bottom during this cast")
logbook <- within(logbook, logbook_notes[key == "2023_08_28_BI-04"] <- "CTD hit bottom during this cast")
logbook <- within(logbook, logbook_notes[key == "2023_09_05_CTD-103"] <- "CTD hit bottom during this cast")
logbook <- within(logbook, logbook_notes[key == "2023_09_05_CTD-107"] <- "CTD hit bottom during this cast")
logbook <- within(logbook, logbook_notes[key == "2023_09_05_CTD-109"] <- "CTD hit bottom during this cast")
logbook <- within(logbook, logbook_notes[key == "2023_09_05_CTD-98"] <- "Associated with Churchill Field Course STN-1")
logbook <- within(logbook, logbook_notes[key == "2023_09_06_CTD-130"] <- "Associated with Churchill Field Course STN-4")
logbook <- within(logbook, logbook_notes[key == "2023_09_06_CTD-132"] <- "Associated with Churchill Field Course STN-5")

#Merge CTD data with logbook 
merged <- left_join(ctd_data,logbook, by="key")

#Prepare the Excel CTD file 
merged$Cruise <- "James Bay and Belcher Islands Expedition 2023"
merged$year <- as.numeric(format(merged$datetime,'%Y'))
merged$month <- as.numeric(format(merged$datetime,'%m'))
merged$day <- as.numeric(format(merged$datetime,'%d'))
merged$hour <- as.numeric(format(merged$datetime,'%H'))
merged$minute <- as.numeric(format(merged$datetime,'%M'))
merged$second <- as.numeric(format(merged$datetime,'%S'))
merged <- merged %>%
  select(c("Cruise","station", "datetime", "year", "month", "day", "hour", "minute", "second", "latitude_deg_7", "longitude_deg_8", "latitude_deg_9", "longitude_deg_10", "bottom_depth_m", "2","31","5","36","4","35","29","30","37","32","33","34","14","15","13","16","17", "logbook_notes"))
excel <- merged
names(excel)[names(excel) == "station"] <- "Station"
names(excel)[names(excel) == "datetime"] <- "Timestamp (UTC)"
names(excel)[names(excel) == "year"] <- "Year (UTC)"
names(excel)[names(excel) == "month"] <- "Month (UTC)"
names(excel)[names(excel) == "day"] <- "Day (UTC)"
names(excel)[names(excel) == "hour"] <- "Hour (UTC)"
names(excel)[names(excel) == "minute"] <- "Minute (UTC)"
names(excel)[names(excel) == "second"] <- "Second (UTC)"
names(excel)[names(excel) == "latitude_deg_7"] <- "Latitude In [Degrees North]"
names(excel)[names(excel) == "longitude_deg_8"] <- "Longitude In [Degrees East]"
names(excel)[names(excel) == "latitude_deg_9"] <- "Latitude Out [Degrees North]"
names(excel)[names(excel) == "longitude_deg_10"] <- "Longitude Out [Degrees East]"
names(excel)[names(excel) == "bottom_depth_m"] <- "Bot. Depth [m]"
names(excel)[names(excel) == "2"] <- "Pressure, Strain Gauge [db]"
names(excel)[names(excel) == "31"] <- "Depth [salt water, m], using lat = 56.3555"
names(excel)[names(excel) == "5"] <- "Conductivity [mS/cm]"
names(excel)[names(excel) == "36"] <- "Salinity, Practical [PSU]"
names(excel)[names(excel) == "4"] <- "Temperature [ITS-90, deg C]"
names(excel)[names(excel) == "35"] <- "Potential Temperature [ITS-90, deg C]"
names(excel)[names(excel) == "29"] <- "Density [kg/m^3]"
names(excel)[names(excel) == "30"] <- "Density [sigma-theta, kg/m^3]"
names(excel)[names(excel) == "37"] <- "Specific Volume Anomaly [10^-8 m^3/kg]"
names(excel)[names(excel) == "32"] <- "Oxygen, SBE 43 [ml/l], WS = 2"
names(excel)[names(excel) == "33"] <- "Oxygen, SBE 43 [umol/kg], WS = 2"
names(excel)[names(excel) == "34"] <- "Oxygen, SBE 43 [% saturation], WS = 2"
names(excel)[names(excel) == "14"] <- "Fluorescence, WET Labs CDOM [mg/m^3]"
names(excel)[names(excel) == "15"] <- "Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]"
names(excel)[names(excel) == "13"] <- "PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]"
names(excel)[names(excel) == "16"] <- "Beam Transmission, WET Labs C-Star [%]"
names(excel)[names(excel) == "17"] <- "Beam Attenuation, WET Labs C-Star [1/m]"
names(excel)[names(excel) == "logbook_notes"] <- "Logbook Notes"

#Prepare ODV file
odv <- merged
odv$Type <- "C"
odv$dec_long_east <- 360 + merged$longitude_deg_8
odv <- odv %>%
  select(c("Cruise", "Type", "station", "year", "month", "day", "hour", "minute", "second", "latitude_deg_7", "dec_long_east", "bottom_depth_m", "2","31","5","36","4","35","29","30","37","32","33","34","14","15","13","16","17"))
names(odv)[names(odv) == "station"] <- "Station"
names(odv)[names(odv) == "year"] <- "Year"
names(odv)[names(odv) == "month"] <- "Month"
names(odv)[names(odv) == "day"] <- "Day"
names(odv)[names(odv) == "hour"] <- "Hour"
names(odv)[names(odv) == "minute"] <- "Minute"
names(odv)[names(odv) == "second"] <- "Second"
names(odv)[names(odv) == "latitude_deg_7"] <- "Latitude [Degrees North]"
names(odv)[names(odv) == "dec_long_east"] <- "Longitude [Degrees East]"
names(odv)[names(odv) == "bottom_depth_m"] <- "Bot. Depth [m]"
names(odv)[names(odv) == "2"] <- "Pressure, Strain Gauge [db]"
names(odv)[names(odv) == "31"] <- "Depth [salt water, m], using lat = 56.3555~^oN"
names(odv)[names(odv) == "5"] <- "Conductivity [mS/cm]"
names(odv)[names(odv) == "36"] <- "Salinity, Practical [PSU]"
names(odv)[names(odv) == "4"] <- "Temperature [ITS-90, ~^oC]"
names(odv)[names(odv) == "35"] <- "Potential Temperature [ITS-90, ~^oC]"
names(odv)[names(odv) == "29"] <- "Density [kg/m~^3]"
names(odv)[names(odv) == "30"] <- "Density [sigma-theta, kg/m~^3]"
names(odv)[names(odv) == "37"] <- "Specific Volume Anomaly [10~^-~^8 m~^3/kg]"
names(odv)[names(odv) == "32"] <- "Oxygen, SBE 43 [ml/l], WS = 2"
names(odv)[names(odv) == "33"] <- "Oxygen, SBE 43 [~$m~#mol/kg], WS = 2"
names(odv)[names(odv) == "34"] <- "Oxygen, SBE 43 [% saturation], WS = 2"
names(odv)[names(odv) == "14"] <- "Fluorescence, WET Labs CDOM [mg/m~^3]"
names(odv)[names(odv) == "15"] <- "Fluorescence, WET Labs ECO-AFL/FL [mg/m~^3]"
names(odv)[names(odv) == "13"] <- "PAR/Irradiance, Biospherical/Licor [~$m~#mol photons/m~^2/sec]"
names(odv)[names(odv) == "16"] <- "Beam Transmission, WET Labs C-Star [%]"
names(odv)[names(odv) == "17"] <- "Beam Attenuation, WET Labs C-Star [1/m]"

#Export
write_excel_csv(excel, "C:/Users/katey/Desktop/2023_wk_auto_ctd_sn7783/data/10_final/2023_autonomous_ctd_downcasts.csv", na = "NA")
write_tsv(odv, "C:/Users/katey/Desktop/2023_wk_auto_ctd_sn7783/data/10_final/odv_2023_autonomous_ctd_downcasts.txt", na = "-999")