library(tidyverse)
library(here)
library(fs)
library(readxl)
library(janitor)
library(stringr) 

#This script is to merge CTD casts with the logbook  
#Read in the casts
ctd_data_protected <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/2021_wk_auto_ctd_sn7783/data/09_split")))  %>% #created a list of files to be imported
  mutate(data = pmap(list(files),
                     ~ read_tsv(..1, col_names = FALSE))) %>% #imported the files
  mutate(data = pmap(list(files, data), 
                     ~ mutate(..2, source_file = as.character(..1)))) %>% #added file name to each row in each file, will be used later to cross-reference the logbook
  select(data) %>%
  map_df(bind_rows) %>%  #joined all tables into one
  filter(!grepl("#", X1)) %>% #removed rows containing # (metadata)
  filter(!grepl("\\*", X1)) #removed rows containing * (metadata)
  
#Create a working copy of the data so that you don't have to import again if a mistake happens
ctd_data <- ctd_data_protected

#For this cast, the upcast will have to be used instead of the downcast due to pump issues. Deleting the downcast here and renaming the upcast as downcast to keep it easier
ctd_data <- ctd_data %>% filter(!grepl("C:/Users/katey/Desktop/2021_wk_auto_ctd_sn7783/data/09_split/dWK21_CTD_0033.cnv", source_file))
ctd_data <- within(ctd_data, source_file[source_file == "C:/Users/katey/Desktop/2021_wk_auto_ctd_sn7783/data/09_split/uWK21_CTD_0033.cnv"] <- "C:/Users/katey/Desktop/2021_wk_auto_ctd_sn7783/data/09_split/dWK21_CTD_0033.cnv")

#Remove the upcasts
ctd_data <- ctd_data %>% filter(!grepl("C:/Users/katey/Desktop/2021_wk_auto_ctd_sn7783/data/09_split/u", source_file))

#Delete all MR/EM data (see Kate's 2021 CTD Cookbook for explanation)
ctd_data <- ctd_data %>% filter(!grepl("C:/Users/katey/Desktop/2021_wk_auto_ctd_sn7783/data/09_split/dWK21_CTD_MR", source_file))
ctd_data <- ctd_data %>% filter(!grepl("C:/Users/katey/Desktop/2021_wk_auto_ctd_sn7783/data/09_split/dWK21_CTD_EM", source_file))

#Remove unnecessary text from key column in ctd_data
ctd_data <- ctd_data %>%
  mutate(source_file = str_remove_all(source_file, "C:/Users/katey/Desktop/2021_wk_auto_ctd_sn7783/data/09_split/dWK21_CTD_")) %>%
  mutate(source_file = str_remove_all(source_file, ".cnv"))
ctd_data <- ctd_data %>% mutate(source_file = as.numeric(source_file))

#Split data column into separate columns
ctd_data <- separate(data = ctd_data, col = X1,  sep = " +", into = c("0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39"))

#Subset needed columns
ctd_data <- ctd_data %>% select(c("28","2","31","5","36","4","35","29","30","37","32","33","34","14","15","13","16","17","source_file"))

#FYI, here are descriptions of what the columns are:
# 28 = timeJ: Julian Days
# 2 = prdM: Pressure, Strain Gauge [db]
# 31 = depSM: Depth [salt water, m], lat = 53.3114
# 5 = c0mS/cm: Conductivity [mS/cm]
# 36 = sal00: Salinity, Practical [PSU]
# 4 = tv290C: Temperature [ITS-90, deg C]
# 35 = potemp090C: Potential Temperature [ITS-90, deg C]
# 29 = density00: Density [density, kg/m^3]
# 30 = sigma-é00: Density [sigma-theta, kg/m^3]
# 37 = sva: Specific Volume Anomaly [10^-8 * m^3/kg]
# 32 = sbeox0ML/L: Oxygen, SBE 43 [ml/l], WS = 2
# 33 = sbox0Mm/Kg: Oxygen, SBE 43 [umol/kg], WS = 2
# 34 = sbeox0PS: Oxygen, SBE 43 [% saturation], WS = 2
# 14 = wetCDOM: Fluorescence, WET Labs CDOM [mg/m^3]
# 15 = flECO-AFL: Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]
# 13 = par: PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]
# 16 = CStarTr0: Beam Transmission, WET Labs C-Star [%]
# 17 = CStarAt0: Beam Attenuation, WET Labs C-Star [1/m]

#Convert time column. Fix the first seven casts being in the wrong timezone. 
names(ctd_data)[names(ctd_data) == "28"] <- "timej" #renamed column
ctd_data <- ctd_data %>% mutate(timej = as.numeric(timej)) #made column numeric
ctd_data$timejfixed <-  ifelse(ctd_data$source_file <= 7, ctd_data$timej + (7/24), ctd_data$timej) #fixed the first few casts that were in the wrong timezone
ctd_data$datetime <- ISOdatetime(2021, 1, 1, 0, 0, 0, tz="UTC") + (ctd_data$timejfixed - 1)*86400 #converted as per SBE instructions, https://blog.seabird.com/tech-tip-julian-days-format/

#Read in the logbook
logbook <- read_excel("C:/Users/katey/Desktop/2021_wk_auto_ctd_sn7783/logbooks/wk21_ship_log.xlsx", sheet = "Sheet1", na = "NA") %>% 
  filter(grepl("CTD", code)) %>% #keep only CTD rows
  select(station_id, cast_id, year_utc, month_utc, day_utc, latitude_in_deg, longitude_in_deg, latitude_out_deg, longitude_out_deg, bottom_depth_m, notes)

#Add clean notes
logbook$logbook_notes <- NA #create a column for tidy notes
logbook <- within(logbook, logbook_notes[station_id == "STN5"] <- "Station 5")
logbook <- within(logbook, logbook_notes[station_id == "STN3"] <- "Station 3")
logbook <- within(logbook, logbook_notes[station_id == "STN2"] <- "Station 2")
logbook <- within(logbook, logbook_notes[station_id == "M4"] <- "Station M4")
logbook <- within(logbook, logbook_notes[station_id == "STN6"] <- "Station 6")
logbook <- within(logbook, logbook_notes[station_id == "STN7"] <- "Station 7")
logbook <- within(logbook, logbook_notes[station_id == "STN8"] <- "Station 8")
logbook <- within(logbook, logbook_notes[station_id == "STN9"] <- "Station 9")
logbook <- within(logbook, logbook_notes[cast_id == "33"] <- "Upcast data.")
logbook <- within(logbook, logbook_notes[cast_id == "136"] <- "Station CMO-A")
logbook <- within(logbook, logbook_notes[cast_id == "137"] <- "Station CMO-B")

#Merge CTD data with logbook 
merged <- left_join(ctd_data, logbook, by=c("source_file"="cast_id"))

#Prepare the Excel CTD file 
merged$Cruise <- "2021 James Bay Expedition"
merged$year <- as.numeric(format(merged$datetime,'%Y'))
merged$month <- as.numeric(format(merged$datetime,'%m'))
merged$day <- as.numeric(format(merged$datetime,'%d'))
merged$hour <- as.numeric(format(merged$datetime,'%H'))
merged$minute <- as.numeric(format(merged$datetime,'%M'))
merged$second <- as.numeric(format(merged$datetime,'%S'))
merged <- merged %>%
  select(c("Cruise","source_file", "datetime", "year", "month", "day", "hour", "minute", "second", "latitude_in_deg", "longitude_in_deg", "latitude_out_deg", "longitude_out_deg", "bottom_depth_m", "2","31","5","36","4","35","29","30","37","32","33","34","14","15","13","16","17", "logbook_notes"))
excel <- merged
names(excel)[names(excel) == "source_file"] <- "Cast ID"
names(excel)[names(excel) == "datetime"] <- "Timestamp (UTC)"
names(excel)[names(excel) == "year"] <- "Year (UTC)"
names(excel)[names(excel) == "month"] <- "Month (UTC)"
names(excel)[names(excel) == "day"] <- "Day (UTC)"
names(excel)[names(excel) == "hour"] <- "Hour (UTC)"
names(excel)[names(excel) == "minute"] <- "Minute (UTC)"
names(excel)[names(excel) == "second"] <- "Second (UTC)"
names(excel)[names(excel) == "latitude_in_deg"] <- "Latitude In [Degrees North]"
names(excel)[names(excel) == "longitude_in_deg"] <- "Longitude In [Degrees East]"
names(excel)[names(excel) == "latitude_out_deg"] <- "Latitude Out [Degrees North]"
names(excel)[names(excel) == "longitude_out_deg"] <- "Longitude Out [Degrees East]"
names(excel)[names(excel) == "bottom_depth_m"] <- "Bot. Depth [m]"
names(excel)[names(excel) == "2"] <- "Pressure, Strain Gauge [db]"
names(excel)[names(excel) == "31"] <- "Depth [salt water, m], using lat = 53.3114"
names(excel)[names(excel) == "5"] <- "Conductivity [mS/cm]"
names(excel)[names(excel) == "36"] <- "Salinity, Practical [PSU]"
names(excel)[names(excel) == "4"] <- "Temperature [ITS-90, deg C]"
names(excel)[names(excel) == "35"] <- "Potential Temperature [ITS-90, deg C]"
names(excel)[names(excel) == "29"] <- "Density [kg/m^3]"
names(excel)[names(excel) == "30"] <- "Density [sigma-theta, kg/m^3]"
names(excel)[names(excel) == "37"] <- "Specific Volume Anomaly [10^-8 m^3/kg]"
names(excel)[names(excel) == "32"] <- "Oxygen, SBE 43 [ml/l], WS = 2"
names(excel)[names(excel) == "33"] <- "Oxygen, SBE 43 [umol/kg], WS = 2"
names(excel)[names(excel) == "34"] <- "Oxygen, SBE 43 [% saturation], WS = 2"
names(excel)[names(excel) == "14"] <- "Fluorescence, WET Labs CDOM [mg/m^3]"
names(excel)[names(excel) == "15"] <- "Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]"
names(excel)[names(excel) == "13"] <- "PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]"
names(excel)[names(excel) == "16"] <- "Beam Transmission, WET Labs C-Star [%]"
names(excel)[names(excel) == "17"] <- "Beam Attenuation, WET Labs C-Star [1/m]"
names(excel)[names(excel) == "logbook_notes"] <- "Notes"

#Prepare ODV file
odv <- merged
odv$Type <- "C" #Type C for CTD
odv <- odv %>% mutate(longitude_in_deg = as.numeric(longitude_in_deg)) #make column numeric
odv$dec_long_east <- 360 + odv$longitude_in_deg #make longitude positive degrees East for ODV
odv <- odv %>%
  select(c("Cruise", "Type", "source_file", "year", "month", "day", "hour", "minute", "second", "latitude_in_deg", "dec_long_east", "bottom_depth_m","2","31","5","36","4","35","29","30","37","32","33","34","14","15","13","16","17", "logbook_notes"))
names(odv)[names(odv) == "source_file"] <- "Station"
names(odv)[names(odv) == "year"] <- "Year"
names(odv)[names(odv) == "month"] <- "Month"
names(odv)[names(odv) == "day"] <- "Day"
names(odv)[names(odv) == "hour"] <- "Hour"
names(odv)[names(odv) == "minute"] <- "Minute"
names(odv)[names(odv) == "second"] <- "Second"
names(odv)[names(odv) == "latitude_in_deg"] <- "Latitude [Degrees North]"
names(odv)[names(odv) == "dec_long_east"] <- "Longitude [Degrees East]"
names(odv)[names(odv) == "bottom_depth_m"] <- "Bot. Depth [m]"
names(odv)[names(odv) == "2"] <- "Pressure, Strain Gauge [db]"
names(odv)[names(odv) == "31"] <- "Depth [salt water, m], using lat = 53.3114~^oN"
names(odv)[names(odv) == "5"] <- "Conductivity [mS/cm]"
names(odv)[names(odv) == "36"] <- "Salinity, Practical [PSU]"
names(odv)[names(odv) == "4"] <- "Temperature [ITS-90, ~^oC]"
names(odv)[names(odv) == "35"] <- "Potential Temperature [ITS-90, ~^oC]"
names(odv)[names(odv) == "29"] <- "Density [kg/m~^3]"
names(odv)[names(odv) == "30"] <- "Density [sigma-theta, kg/m~^3]"
names(odv)[names(odv) == "37"] <- "Specific Volume Anomaly [10~^-~^8 m~^3/kg]"
names(odv)[names(odv) == "32"] <- "Oxygen, SBE 43 [ml/l], WS = 2"
names(odv)[names(odv) == "33"] <- "Oxygen, SBE 43 [~$m~#mol/kg], WS = 2"
names(odv)[names(odv) == "34"] <- "Oxygen, SBE 43 [% saturation], WS = 2"
names(odv)[names(odv) == "14"] <- "Fluorescence, WET Labs CDOM [mg/m~^3]"
names(odv)[names(odv) == "15"] <- "Fluorescence, WET Labs ECO-AFL/FL [mg/m~^3]"
names(odv)[names(odv) == "13"] <- "PAR/Irradiance, Biospherical/Licor [~$m~#mol photons/m~^2/sec]"
names(odv)[names(odv) == "16"] <- "Beam Transmission, WET Labs C-Star [%]"
names(odv)[names(odv) == "17"] <- "Beam Attenuation, WET Labs C-Star [1/m]"
names(odv)[names(odv) == "logbook_notes"] <- "Notes"

#Export
write_excel_csv(excel, "C:/Users/katey/Desktop/2021_wk_auto_ctd_sn7783/data/10_final/excel_2021_autonomous_ctd_downcasts.csv", na = "NA")
write_tsv(odv, "C:/Users/katey/Desktop/2021_wk_auto_ctd_sn7783/data/10_final/odv_2021_autonomous_ctd_downcasts.txt", na = "-999")