library(tidyverse)
library(here)
library(fs)
library(readxl)
library(janitor)
library(stringr) 

#This script is to merge CTD casts with the logbook  
#Read in the casts
ctd_data_protected <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/2022 CTD Data Processing/2022_wk_auto_ctd_sn7783/data/09_split")))  %>% #created a list of files to be imported
  mutate(data = pmap(list(files),
                     ~ read_tsv(..1, col_names = FALSE))) %>% #imported the files
  mutate(data = pmap(list(files, data), 
                     ~ mutate(..2, source_file = as.character(..1)))) %>% #added file name to each row in each file, will be used later to cross-reference the logbook
  select(data) %>%
  map_df(bind_rows) %>%  #joined all tables into one
  filter(!grepl("#", X1)) %>% #removed rows containing # (metadata)
  filter(!grepl("\\*", X1)) #removed rows containing * (metadata)
  
#Create a working copy of the data so that you don't have to import again if a mistake happens
ctd_data <- ctd_data_protected

#For three casts, the upcast will have to be used instead of the downcast due to improper cast procedure. These bad downcasts were deleted at the source. Renaming the three upcasts as downcasts to keep them easier
ctd_data <- within(ctd_data, source_file[source_file == "C:/Users/katey/Desktop/2022 CTD Data Processing/2022_wk_auto_ctd_sn7783/data/09_split/uSBE19plus_01907783_20220811-CTD_STN_13_Z2_B.cnv"] <- "C:/Users/katey/Desktop/2022 CTD Data Processing/2022_wk_auto_ctd_sn7783/data/09_split/dSBE19plus_01907783_20220811-CTD_STN_13_Z2_B.cnv")
ctd_data <- within(ctd_data, source_file[source_file == "C:/Users/katey/Desktop/2022 CTD Data Processing/2022_wk_auto_ctd_sn7783/data/09_split/uSBE19plus_01907783_20220811-CTD_STN_13_Z3.cnv"] <- "C:/Users/katey/Desktop/2022 CTD Data Processing/2022_wk_auto_ctd_sn7783/data/09_split/dSBE19plus_01907783_20220811-CTD_STN_13_Z3.cnv")
ctd_data <- within(ctd_data, source_file[source_file == "C:/Users/katey/Desktop/2022 CTD Data Processing/2022_wk_auto_ctd_sn7783/data/09_split/uSBE19plus_01907783_20220811-CTD_STN_13_Z4.cnv"] <- "C:/Users/katey/Desktop/2022 CTD Data Processing/2022_wk_auto_ctd_sn7783/data/09_split/dSBE19plus_01907783_20220811-CTD_STN_13_Z4.cnv")

#Remove the upcasts
ctd_data <- ctd_data %>% filter(!grepl("C:/Users/katey/Desktop/2022 CTD Data Processing/2022_wk_auto_ctd_sn7783/data/09_split/uSBE19plus", source_file))

#Remove unnecessary text from key column in ctd_data
ctd_data <- ctd_data %>%
  mutate(source_file = str_remove_all(source_file, "C:/Users/katey/Desktop/2022 CTD Data Processing/2022_wk_auto_ctd_sn7783/data/09_split/dSBE19plus_01907783_")) %>%
  mutate(source_file = str_remove_all(source_file, ".cnv"))

#Split source_file name into year, month, day, cast ID for merging with logbook
ctd_data <- separate(data = ctd_data, col = source_file,  sep = "-", into = c("date","ctd_id")) #split source_file into date and CTD ID
ctd_data$year_utc <- str_sub(ctd_data$date, 1, 4) #extract year from date
ctd_data$month_utc <- str_sub(ctd_data$date, 5, 6) #extract month from date
ctd_data$day_utc <- str_sub(ctd_data$date, 7, 8) #extract day from date
ctd_data <- ctd_data %>% 
  mutate(year_utc = as.numeric(year_utc)) %>%
  mutate(month_utc = as.numeric(month_utc)) %>%
  mutate(day_utc = as.numeric(day_utc)) #make all columns numeric

#Split data column into separate columns
ctd_data <- separate(data = ctd_data, col = X1,  sep = " +", into = c("0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39"))

#Subset needed columns
ctd_data <- ctd_data %>% select(c("28","2","31","5","36","4","35","29","30","37","32","33","34","14","15","13","16","17","ctd_id","year_utc","month_utc","day_utc"))

#FYI, here are descriptions of what the columns are:
# 28 = timeJ: Julian Days
# 2 = prdM: Pressure, Strain Gauge [db]
# 31 = depSM: Depth [salt water, m], lat = 54.1684
# 5 = c0mS/cm: Conductivity [mS/cm]
# 36 = sal00: Salinity, Practical [PSU]
# 4 = tv290C: Temperature [ITS-90, deg C]
# 35 = potemp090C: Potential Temperature [ITS-90, deg C]
# 29 = density00: Density [density, kg/m^3]
# 30 = sigma-é00: Density [sigma-theta, kg/m^3]
# 37 = sva: Specific Volume Anomaly [10^-8 * m^3/kg]
# 32 = sbeox0ML/L: Oxygen, SBE 43 [ml/l], WS = 2
# 33 = sbox0Mm/Kg: Oxygen, SBE 43 [umol/kg], WS = 2
# 34 = sbeox0PS: Oxygen, SBE 43 [% saturation], WS = 2
# 14 = wetCDOM: Fluorescence, WET Labs CDOM [mg/m^3]
# 15 = flECO-AFL: Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]
# 13 = par: PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]
# 16 = CStarTr0: Beam Transmission, WET Labs C-Star [%]
# 17 = CStarAt0: Beam Attenuation, WET Labs C-Star [1/m]

#Convert time column
names(ctd_data)[names(ctd_data) == "28"] <- "timej" #renamed column
ctd_data <- ctd_data %>% mutate(timej = as.numeric(timej)) #made column numeric
ctd_data$datetime <- ISOdatetime(2022, 1, 1, 0, 0, 0, tz="UTC") + (ctd_data$timej - 1)*86400 #converted as per SBE instructions, https://blog.seabird.com/tech-tip-julian-days-format/

#Read in the logbook
logbook <- read_excel("C:/Users/katey/Desktop/2022 CTD Data Processing/2022_wk_auto_ctd_sn7783/logbooks/jbe2022_ship_logbook.xlsx", sheet = "Sheet1", na = "NA") %>% 
  filter(grepl("CTD", code)) %>% #keep only CTD rows
  select(station_id, year_utc, month_utc, day_utc, latitude_in_deg, longitude_in_deg, latitude_out_deg, longitude_out_deg, bottom_depth_m, notes)

#Add clean notes
logbook$logbook_notes <- NA #create a column for tidy notes
logbook <- within(logbook, logbook_notes[station_id == "CTD_Z1"] <- "Small boat station. This station was about 25 m away from CTD_Z2, location from CTD_Z2 used here.")
logbook <- within(logbook, logbook_notes[station_id == "CTD_Z2"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD_Z3"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD_STN_13_Z2_B"] <- "Upcast data. Cast processed following seawater guidelines, however, this cast captured freshwater samples (samples with conductivity less than or equal to 6 mS/cm).")
logbook <- within(logbook, logbook_notes[station_id == "CTD_STN_13_Z3"] <- "Upcast data. Cast processed following seawater guidelines, however, this cast captured freshwater samples (samples with conductivity less than or equal to 6 mS/cm).")
logbook <- within(logbook, logbook_notes[station_id == "CTD_STN_13_Z4"] <- "Upcast data. Cast processed following seawater guidelines, however, this cast captured freshwater samples (samples with conductivity less than or equal to 6 mS/cm).")
logbook <- within(logbook, logbook_notes[station_id == "CTD105"] <- "Small boat station. Cast processed following seawater guidelines, however, this cast captured freshwater samples (samples with conductivity less than or equal to 6 mS/cm).")
logbook <- within(logbook, logbook_notes[station_id == "CTD106"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD107"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD108"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD109"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD110"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD111"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD_STN_06_ZA"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD_STN_06_ZB"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD_STN_06_ZC"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD_STN_09_ZA"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD_STN_09_ZB"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD_STN_09_ZC"] <- "Small boat station.")
logbook <- within(logbook, logbook_notes[station_id == "CTD138"] <- "STN-10.")
logbook <- within(logbook, logbook_notes[station_id == "CTD148"] <- "Mooring location M1.")
logbook <- within(logbook, logbook_notes[station_id == "CTD124"] <- "STN-17.")
logbook <- within(logbook, logbook_notes[station_id == "CTD082"] <- "Minimum conductivity frequency was never met during this cast, therefore, the pump never turned on. All pumped measurements (DO, temp, cond) are removed. Cast processed following seawater guidelines, however, this cast captured freshwater samples (samples with conductivity less than or equal to 6 mS/cm).")
logbook <- within(logbook, logbook_notes[station_id == "CTD083"] <- "Minimum conductivity frequency was never met during this cast, therefore, the pump never turned on. All pumped measurements (DO, temp, cond) are removed. Cast processed following seawater guidelines, however, this cast captured freshwater samples (samples with conductivity less than or equal to 6 mS/cm).")
logbook <- within(logbook, logbook_notes[station_id == "CTD085"] <- "Cast processed following seawater guidelines, however, this cast captured freshwater samples (samples with conductivity less than or equal to 6 mS/cm).")
logbook <- within(logbook, logbook_notes[station_id == "CTD086"] <- "Cast processed following seawater guidelines, however, this cast captured freshwater samples (samples with conductivity less than or equal to 6 mS/cm).")
logbook <- within(logbook, logbook_notes[station_id == "CTD088"] <- "Cast processed following seawater guidelines, however, this cast captured freshwater samples (samples with conductivity less than or equal to 6 mS/cm).")

#Renaming a couple columns as the following step doesn't work when column names are numbers 
names(ctd_data)[names(ctd_data) == "5"] <- "five"
names(ctd_data)[names(ctd_data) == "36"] <- "thirtysix"
names(ctd_data)[names(ctd_data) == "4"] <- "four"
names(ctd_data)[names(ctd_data) == "35"] <- "thirtyfive"
names(ctd_data)[names(ctd_data) == "29"] <- "twentynine"
names(ctd_data)[names(ctd_data) == "30"] <- "thirty"
names(ctd_data)[names(ctd_data) == "37"] <- "thirtyseven"
names(ctd_data)[names(ctd_data) == "32"] <- "thirtytwo"
names(ctd_data)[names(ctd_data) == "33"] <- "thirtythree"
names(ctd_data)[names(ctd_data) == "34"] <- "thirtyfour"
names(ctd_data)[names(ctd_data) == "13"] <- "thirteen"

#Remove pumped data from CTD082 and CTD083
ctd_data <- within(ctd_data, five[ctd_id == "CTD082"] <- NA)
ctd_data <- within(ctd_data, thirtysix[ctd_id == "CTD082"] <- NA)
ctd_data <- within(ctd_data, four[ctd_id == "CTD082"] <- NA)
ctd_data <- within(ctd_data, thirtyfive[ctd_id == "CTD082"] <- NA)
ctd_data <- within(ctd_data, twentynine[ctd_id == "CTD082"] <- NA)
ctd_data <- within(ctd_data, thirty[ctd_id == "CTD082"] <- NA)
ctd_data <- within(ctd_data, thirtyseven[ctd_id == "CTD082"] <- NA)
ctd_data <- within(ctd_data, thirtytwo[ctd_id == "CTD082"] <- NA)
ctd_data <- within(ctd_data, thirtythree[ctd_id == "CTD082"] <- NA)
ctd_data <- within(ctd_data, thirtyfour[ctd_id == "CTD082"] <- NA)

ctd_data <- within(ctd_data, five[ctd_id == "CTD083"] <- NA)
ctd_data <- within(ctd_data, thirtysix[ctd_id == "CTD083"] <- NA)
ctd_data <- within(ctd_data, four[ctd_id == "CTD083"] <- NA)
ctd_data <- within(ctd_data, thirtyfive[ctd_id == "CTD083"] <- NA)
ctd_data <- within(ctd_data, twentynine[ctd_id == "CTD083"] <- NA)
ctd_data <- within(ctd_data, thirty[ctd_id == "CTD083"] <- NA)
ctd_data <- within(ctd_data, thirtyseven[ctd_id == "CTD083"] <- NA)
ctd_data <- within(ctd_data, thirtytwo[ctd_id == "CTD083"] <- NA)
ctd_data <- within(ctd_data, thirtythree[ctd_id == "CTD083"] <- NA)
ctd_data <- within(ctd_data, thirtyfour[ctd_id == "CTD083"] <- NA)

#Remove PAR data from CTD221 and CTD222 (cap was left on sensor)
ctd_data <- within(ctd_data, thirteen[ctd_id == "CTD221"] <- NA)
ctd_data <- within(ctd_data, thirteen[ctd_id == "CTD222"] <- NA)

#Load Yurkowski's logbook
dfo_logbook <- read_excel("C:/Users/katey/Desktop/2022 CTD Data Processing/2022_wk_auto_ctd_sn7783/logbooks/yurkowski2022_ctd_log.xlsx", sheet = "Event Log", na = "NA") %>% 
  select(station_id, year_utc, month_utc, day_utc, latitude_in_deg, longitude_in_deg, latitude_out_deg, longitude_out_deg, bottom_depth_m, logbook_notes)

#Append Yurkowski's logbook to the JBE logbook 
logbook <- logbook %>% mutate(latitude_in_deg = as.numeric(latitude_in_deg)) 
logbook <- logbook %>% mutate(longitude_in_deg = as.numeric(longitude_in_deg))
logbook <- logbook %>% mutate(latitude_out_deg = as.numeric(latitude_out_deg))
logbook <- logbook %>% mutate(longitude_out_deg = as.numeric(longitude_out_deg))
logbook <- logbook %>% mutate(bottom_depth_m = as.numeric(bottom_depth_m))
logbook <- bind_rows(logbook, dfo_logbook)

#Merge CTD data with logbook 
merged <- left_join(ctd_data, logbook, by=c("year_utc"="year_utc", "month_utc"="month_utc", "day_utc"="day_utc", "ctd_id"="station_id"))

#Prepare the Excel CTD file 
merged$Cruise <- "2022 James Bay and Belcher Islands Expedition"
merged <- within(merged, Cruise[ctd_id == "CTD221"] <- "DFO's 2022 Churchill Expedition")
merged <- within(merged, Cruise[ctd_id == "CTD222"] <- "DFO's 2022 Churchill Expedition")
merged <- within(merged, Cruise[ctd_id == "CTD223"] <- "DFO's 2022 Churchill Expedition")
merged <- within(merged, Cruise[ctd_id == "CTD224"] <- "DFO's 2022 Churchill Expedition")
merged <- within(merged, Cruise[day_utc == "31"] <- "DFO's 2022 Churchill Expedition")
merged <- within(merged, Cruise[month_utc == "9"] <- "DFO's 2022 Churchill Expedition")

merged$year <- as.numeric(format(merged$datetime,'%Y'))
merged$month <- as.numeric(format(merged$datetime,'%m'))
merged$day <- as.numeric(format(merged$datetime,'%d'))
merged$hour <- as.numeric(format(merged$datetime,'%H'))
merged$minute <- as.numeric(format(merged$datetime,'%M'))
merged$second <- as.numeric(format(merged$datetime,'%S'))
merged <- merged %>%
  select(c("Cruise","ctd_id", "datetime", "year", "month", "day", "hour", "minute", "second", "latitude_in_deg", "longitude_in_deg", "latitude_out_deg", "longitude_out_deg", "bottom_depth_m", "2","31","five","thirtysix","four","thirtyfive","twentynine","thirty","thirtyseven","thirtytwo","thirtythree","thirtyfour","14","15","thirteen","16","17", "logbook_notes"))
excel <- merged
names(excel)[names(excel) == "ctd_id"] <- "Station"
names(excel)[names(excel) == "datetime"] <- "Timestamp (UTC)"
names(excel)[names(excel) == "year"] <- "Year (UTC)"
names(excel)[names(excel) == "month"] <- "Month (UTC)"
names(excel)[names(excel) == "day"] <- "Day (UTC)"
names(excel)[names(excel) == "hour"] <- "Hour (UTC)"
names(excel)[names(excel) == "minute"] <- "Minute (UTC)"
names(excel)[names(excel) == "second"] <- "Second (UTC)"
names(excel)[names(excel) == "latitude_in_deg"] <- "Latitude In [Degrees North]"
names(excel)[names(excel) == "longitude_in_deg"] <- "Longitude In [Degrees East]"
names(excel)[names(excel) == "latitude_out_deg"] <- "Latitude Out [Degrees North]"
names(excel)[names(excel) == "longitude_out_deg"] <- "Longitude Out [Degrees East]"
names(excel)[names(excel) == "bottom_depth_m"] <- "Bot. Depth [m]"
names(excel)[names(excel) == "2"] <- "Pressure, Strain Gauge [db]"
names(excel)[names(excel) == "31"] <- "Depth [salt water, m], using lat = 54.1684"
names(excel)[names(excel) == "five"] <- "Conductivity [mS/cm]"
names(excel)[names(excel) == "thirtysix"] <- "Salinity, Practical [PSU]"
names(excel)[names(excel) == "four"] <- "Temperature [ITS-90, deg C]"
names(excel)[names(excel) == "thirtyfive"] <- "Potential Temperature [ITS-90, deg C]"
names(excel)[names(excel) == "twentynine"] <- "Density [kg/m^3]"
names(excel)[names(excel) == "thirty"] <- "Density [sigma-theta, kg/m^3]"
names(excel)[names(excel) == "thirtyseven"] <- "Specific Volume Anomaly [10^-8 m^3/kg]"
names(excel)[names(excel) == "thirtytwo"] <- "Oxygen, SBE 43 [ml/l], WS = 2"
names(excel)[names(excel) == "thirtythree"] <- "Oxygen, SBE 43 [umol/kg], WS = 2"
names(excel)[names(excel) == "thirtyfour"] <- "Oxygen, SBE 43 [% saturation], WS = 2"
names(excel)[names(excel) == "14"] <- "Fluorescence, WET Labs CDOM [mg/m^3]"
names(excel)[names(excel) == "15"] <- "Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]"
names(excel)[names(excel) == "thirteen"] <- "PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]"
names(excel)[names(excel) == "16"] <- "Beam Transmission, WET Labs C-Star [%]"
names(excel)[names(excel) == "17"] <- "Beam Attenuation, WET Labs C-Star [1/m]"
names(excel)[names(excel) == "logbook_notes"] <- "Notes"

#Prepare ODV file
odv <- merged
#CTD 109-111 are missing IN coordinates; substitute OUT coordinates for IN 
odv <- within(odv, latitude_in_deg[ctd_id == "CTD109"] <- "52.9764")
odv <- within(odv, longitude_in_deg[ctd_id == "CTD109"] <- "-78.8753")
odv <- within(odv, latitude_in_deg[ctd_id == "CTD110"] <- "52.9702")
odv <- within(odv, longitude_in_deg[ctd_id == "CTD110"] <- "-78.9814")
odv <- within(odv, latitude_in_deg[ctd_id == "CTD111"] <- "52.9593")
odv <- within(odv, longitude_in_deg[ctd_id == "CTD111"] <- "-78.9133")
#Continue preparing the ODV file
odv$Type <- "C" #Type C for CTD
odv <- odv %>% mutate(longitude_in_deg = as.numeric(longitude_in_deg)) #make column numeric
odv$dec_long_east <- 360 + odv$longitude_in_deg #make longitude positive degrees East for ODV
odv <- odv %>%
  select(c("Cruise", "Type", "ctd_id", "year", "month", "day", "hour", "minute", "second", "latitude_in_deg", "dec_long_east", "bottom_depth_m","2","31","five","thirtysix","four","thirtyfive","twentynine","thirty","thirtyseven","thirtytwo","thirtythree","thirtyfour","14","15","thirteen","16","17", "logbook_notes"))
names(odv)[names(odv) == "ctd_id"] <- "Station"
names(odv)[names(odv) == "year"] <- "Year"
names(odv)[names(odv) == "month"] <- "Month"
names(odv)[names(odv) == "day"] <- "Day"
names(odv)[names(odv) == "hour"] <- "Hour"
names(odv)[names(odv) == "minute"] <- "Minute"
names(odv)[names(odv) == "second"] <- "Second"
names(odv)[names(odv) == "latitude_in_deg"] <- "Latitude [Degrees North]"
names(odv)[names(odv) == "dec_long_east"] <- "Longitude [Degrees East]"
names(odv)[names(odv) == "bottom_depth_m"] <- "Bot. Depth [m]"
names(odv)[names(odv) == "2"] <- "Pressure, Strain Gauge [db]"
names(odv)[names(odv) == "31"] <- "Depth [salt water, m], using lat = 54.1684~^oN"
names(odv)[names(odv) == "five"] <- "Conductivity [mS/cm]"
names(odv)[names(odv) == "thirtysix"] <- "Salinity, Practical [PSU]"
names(odv)[names(odv) == "four"] <- "Temperature [ITS-90, ~^oC]"
names(odv)[names(odv) == "thirtyfive"] <- "Potential Temperature [ITS-90, ~^oC]"
names(odv)[names(odv) == "twentynine"] <- "Density [kg/m~^3]"
names(odv)[names(odv) == "thirty"] <- "Density [sigma-theta, kg/m~^3]"
names(odv)[names(odv) == "thirtyseven"] <- "Specific Volume Anomaly [10~^-~^8 m~^3/kg]"
names(odv)[names(odv) == "thirtytwo"] <- "Oxygen, SBE 43 [ml/l], WS = 2"
names(odv)[names(odv) == "thirtythree"] <- "Oxygen, SBE 43 [~$m~#mol/kg], WS = 2"
names(odv)[names(odv) == "thirtyfour"] <- "Oxygen, SBE 43 [% saturation], WS = 2"
names(odv)[names(odv) == "14"] <- "Fluorescence, WET Labs CDOM [mg/m~^3]"
names(odv)[names(odv) == "15"] <- "Fluorescence, WET Labs ECO-AFL/FL [mg/m~^3]"
names(odv)[names(odv) == "thirteen"] <- "PAR/Irradiance, Biospherical/Licor [~$m~#mol photons/m~^2/sec]"
names(odv)[names(odv) == "16"] <- "Beam Transmission, WET Labs C-Star [%]"
names(odv)[names(odv) == "17"] <- "Beam Attenuation, WET Labs C-Star [1/m]"
names(odv)[names(odv) == "logbook_notes"] <- "Notes"

#Export
write_excel_csv(excel, "C:/Users/katey/Desktop/2022 CTD Data Processing/2022_wk_auto_ctd_sn7783/data/10_final/excel_2022_autonomous_ctd_downcasts.csv", na = "NA")
write_tsv(odv, "C:/Users/katey/Desktop/2022 CTD Data Processing/2022_wk_auto_ctd_sn7783/data/10_final/odv_2022_autonomous_ctd_downcasts.txt", na = "-999")