library(tidyverse)
library(here)
library(fs)
library(readxl)
library(janitor)
library(stringr) 
library(lubridate)
library(tidyr)
library(dplyr)
library(hms)

#This script is to merge CTD casts with the logbook  
#Read in the casts
ctd_data <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/CEOS Data Processing/auto/2018_wk_auto_ctd_sn7783/data/09_split")))  %>% #created a list of files to be imported
  mutate(data = pmap(list(files),
                     ~ read_tsv(..1, col_names = FALSE, show_col_types = FALSE))) %>% #imported the files
  mutate(data = pmap(list(files, data), 
                     ~ mutate(..2, source_file = as.character(..1)))) %>% #added file name to each row in each file, will be used later to cross-reference the logbook
  select(data) %>%
  map_df(bind_rows) %>%  #joined all tables into one
  filter(!grepl("#", X1)) %>% #removed rows containing # (metadata)
  filter(!grepl("\\*", X1)) #removed rows containing * (metadata)

#Remove the upcasts
ctd_data <- ctd_data %>% filter(!grepl("C:/Users/katey/Desktop/CEOS Data Processing/auto/2018_wk_auto_ctd_sn7783/data/09_split/u", source_file))

#Remove unnecessary text from key column in ctd_data
ctd_data <- ctd_data %>%
  mutate(source_file = str_remove_all(source_file, "C:/Users/katey/Desktop/CEOS Data Processing/auto/2018_wk_auto_ctd_sn7783/data/09_split/d")) 

#Split data column into separate columns
ctd_data <- separate(data = ctd_data, col = X1,  sep = " +", into = c("0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39"))

#Subset needed columns
ctd_data <- ctd_data %>% select(c("28","2","31","5","36","4","35","29","30","37","32","33","34","14","15","13","16","17","source_file"))

#FYI, here are descriptions of what the columns are:
# 28 = timeJ: Julian Days
# 2 = prdM: Pressure, Strain Gauge [db]
# 31 = depSM: Depth [salt water, m], lat = 63.4851
# 5 = c0mS/cm: Conductivity [mS/cm]
# 36 = sal00: Salinity, Practical [PSU]
# 4 = tv290C: Temperature [ITS-90, deg C]
# 35 = potemp090C: Potential Temperature [ITS-90, deg C]
# 29 = density00: Density [density, kg/m^3]
# 30 = sigma-é00: Density [sigma-theta, kg/m^3]
# 37 = sva: Specific Volume Anomaly [10^-8 * m^3/kg]
# 32 = sbeox0ML/L: Oxygen, SBE 43 [ml/l], WS = 2
# 33 = sbox0Mm/Kg: Oxygen, SBE 43 [umol/kg], WS = 2
# 34 = sbeox0PS: Oxygen, SBE 43 [% saturation], WS = 2
# 14 = wetCDOM: Fluorescence, WET Labs CDOM [mg/m^3]
# 15 = flECO-AFL: Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]
# 13 = par: PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]
# 16 = CStarTr0: Beam Transmission, WET Labs C-Star [%]
# 17 = CStarAt0: Beam Attenuation, WET Labs C-Star [1/m]

#Convert time column
names(ctd_data)[names(ctd_data) == "28"] <- "timej" #renamed column
ctd_data <- ctd_data %>% mutate(timej = as.numeric(timej)) #made column numeric
ctd_data$datetime <- ISOdatetime(2018, 1, 1, 0, 0, 0, tz="UTC") + (ctd_data$timej - 1)*86400 #converted as per SBE instructions, https://blog.seabird.com/tech-tip-julian-days-format/

#Read in the logbook
logbook <- read_excel("C:/Users/katey/Desktop/CEOS Data Processing/auto/2018_wk_auto_ctd_sn7783/logbooks/SIMEP Science Log Book 2018.xlsx", sheet = "Sheet1") %>% #read in the logbook
  clean_names() %>% #make column names tidy
  within(event_code[station == "CTD 38" & bottom_depth_m == "44.7"] <- "remove") %>% #prepare to remove a failed cast entry that has an accidentally-duplicated event number, would cause issues later on if kept
  filter(grepl("CTD", event_code)) %>% #keep only CTD entries
  separate(latitude, into = c("lat_deg", "lat_min"), sep = "[°]", extra = "merge") %>% #split latitude column into latitude degrees and latitude minutes
  separate(longitude, into = c("long_deg", "long_min"), sep = "[°]", extra = "merge") %>% #split longitude column into longitude degrees and longitude minutes
  mutate(across(c(lat_deg, lat_min, long_deg, long_min), as.numeric)) %>% #make coordinate columns numeric
  mutate(lat_dec_deg = lat_deg + (lat_min/60)) %>% #calculate decimal degrees
  mutate(long_dec_deg = long_deg + (long_min/60)) %>% #calculate decimal degrees
  mutate(long_dec_deg = -abs(long_dec_deg)) %>% #making longitude values negative
  within(time_cst[time_cst == "00:25"] <- "0.0173611111111111") %>% #time was not read in correctly, fixing it
  within(time_cst[time_cst == "00:41"] <- "0.0284722222222222") %>% #time was not read in correctly, fixing it
  within(time_cst[time_cst == "00:47"] <- "0.0326388888888889") %>% #time was not read in correctly, fixing it
  within(station[event_no == "171"] <- "CTD 64B") %>% #events #170 & 171 are both labelled CTD 64, renaming one of them
  select(-c(date, time_cst, lat_deg, lat_min, long_deg, long_min, target_depth_m, logger, event_code, notes)) %>% #removing unnecessary columns (nothing in the notes column is needed in the final file)
  within(time_code[station == "CTD 1"] <- "BE") #adding a missing time code

#Pivoting wider. Doing it this way (as opposed to using pivot_wider function) so that column formatting is unaffected. 
logbook_be <- logbook %>% filter(grepl("BE", time_code)) %>% rename_with(~ paste0(.x, "_be")) #keep only BE entries
logbook_bo <- logbook %>% filter(grepl("BO", time_code)) %>% rename_with(~ paste0(.x, "_bo")) #keep only BO entries
logbook_en <- logbook %>% filter(grepl("EN", time_code)) %>% rename_with(~ paste0(.x, "_en")) #keep only EN entries
logbook_pivoted <- left_join(logbook_be, logbook_bo, by = c("event_no_be" = "event_no_bo")) #join BE and BO entries by event number
logbook_pivoted <- left_join(logbook_pivoted, logbook_en, by = c("event_no_be" = "event_no_en")) #add EN entries by event number
remove(logbook, logbook_be, logbook_bo, logbook_en) #remove unnecessary data frames 
logbook_pivoted <- logbook_pivoted %>% select(-c(time_code_be, time_code_bo, time_code_en, station_bo, station_en)) #remove unnecessary columns

#A merge key was made by checking CTD casts start times against start times listed in the logbook to properly match casts with their metadata
key <- read_excel("C:/Users/katey/Desktop/CEOS Data Processing/auto/2018_wk_auto_ctd_sn7783/logbooks/merge_key.xlsx", sheet = "Clean") #read in the merge key

#Join the key with the logbook
logbook <- left_join(key, logbook_pivoted, by = c("station_be"="station_be", "event_no_be"="event_no_be"))
remove(key, logbook_pivoted) #remove unnecessary data frames 

#Add clean notes
logbook$logbook_notes <- NA #create a column for tidy notes

#Merge CTD data with logbook 
merged <- left_join(ctd_data, logbook, by=c("source_file"="source_file"))

#Remove all CTD casts which do not have a logbook entry match 
merged <- merged %>% filter(!is.na(event_no_be))

#Prepare the Excel CTD file 
merged$Cruise <- "2018 SIMEP"
merged$year <- as.numeric(format(merged$datetime,'%Y'))
merged$month <- as.numeric(format(merged$datetime,'%m'))
merged$day <- as.numeric(format(merged$datetime,'%d'))
merged$hour <- as.numeric(format(merged$datetime,'%H'))
merged$minute <- as.numeric(format(merged$datetime,'%M'))
merged$second <- as.numeric(format(merged$datetime,'%S'))
merged <- merged %>%
  select(c("Cruise","station_be", "event_no_be", "datetime", "year", "month", "day", "hour", "minute", "second", "lat_dec_deg_be", "long_dec_deg_be", "lat_dec_deg_bo", "long_dec_deg_bo", "bottom_depth_m_be", "2","31","5","36","4","35","29","30","37","32","33","34","14","15","13","16","17", "logbook_notes"))
excel <- merged
names(excel)[names(excel) == "station_be"] <- "Station"
names(excel)[names(excel) == "event_no_be"] <- "Event Number"
names(excel)[names(excel) == "datetime"] <- "Timestamp (UTC)"
names(excel)[names(excel) == "year"] <- "Year (UTC)"
names(excel)[names(excel) == "month"] <- "Month (UTC)"
names(excel)[names(excel) == "day"] <- "Day (UTC)"
names(excel)[names(excel) == "hour"] <- "Hour (UTC)"
names(excel)[names(excel) == "minute"] <- "Minute (UTC)"
names(excel)[names(excel) == "second"] <- "Second (UTC)"
names(excel)[names(excel) == "lat_dec_deg_be"] <- "Latitude In [Degrees North]"
names(excel)[names(excel) == "long_dec_deg_be"] <- "Longitude In [Degrees East]"
names(excel)[names(excel) == "lat_dec_deg_bo"] <- "Latitude Bottom [Degrees North]"
names(excel)[names(excel) == "long_dec_deg_bo"] <- "Longitude Bottom [Degrees East]"
names(excel)[names(excel) == "bottom_depth_m_be"] <- "Bot. Depth [m]"
names(excel)[names(excel) == "2"] <- "Pressure, Strain Gauge [db]"
names(excel)[names(excel) == "31"] <- "Depth [salt water, m], using lat = 63.4851"
names(excel)[names(excel) == "5"] <- "Conductivity [mS/cm]"
names(excel)[names(excel) == "36"] <- "Salinity, Practical [PSU]"
names(excel)[names(excel) == "4"] <- "Temperature [ITS-90, deg C]"
names(excel)[names(excel) == "35"] <- "Potential Temperature [ITS-90, deg C]"
names(excel)[names(excel) == "29"] <- "Density [kg/m^3]"
names(excel)[names(excel) == "30"] <- "Density [sigma-theta, kg/m^3]"
names(excel)[names(excel) == "37"] <- "Specific Volume Anomaly [10^-8 m^3/kg]"
names(excel)[names(excel) == "32"] <- "Oxygen, SBE 43 [ml/l], WS = 2"
names(excel)[names(excel) == "33"] <- "Oxygen, SBE 43 [umol/kg], WS = 2"
names(excel)[names(excel) == "34"] <- "Oxygen, SBE 43 [% saturation], WS = 2"
names(excel)[names(excel) == "14"] <- "Fluorescence, WET Labs CDOM [mg/m^3]"
names(excel)[names(excel) == "15"] <- "Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]"
names(excel)[names(excel) == "13"] <- "PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]"
names(excel)[names(excel) == "16"] <- "Beam Transmission, WET Labs C-Star [%]"
names(excel)[names(excel) == "17"] <- "Beam Attenuation, WET Labs C-Star [1/m]"
names(excel)[names(excel) == "logbook_notes"] <- "Notes"

#Prepare ODV file
odv <- merged

odv$Type <- "C" #Type C for CTD
odv <- odv %>% mutate(long_dec_deg_be = as.numeric(long_dec_deg_be)) #make column numeric
odv$dec_long_east <- 360 + odv$long_dec_deg_be #make longitude positive degrees East for ODV
odv <- odv %>%
  select(c("Cruise", "Type", "station_be", "event_no_be", "year", "month", "day", "hour", "minute", "second", "lat_dec_deg_be", "dec_long_east", "bottom_depth_m_be","2","31","5","36","4","35","29","30","37","32","33","34","14","15","13","16","17", "logbook_notes"))
names(odv)[names(odv) == "station_be"] <- "Station"
names(odv)[names(odv) == "event_no_be"] <- "Event Number"
names(odv)[names(odv) == "year"] <- "Year"
names(odv)[names(odv) == "month"] <- "Month"
names(odv)[names(odv) == "day"] <- "Day"
names(odv)[names(odv) == "hour"] <- "Hour"
names(odv)[names(odv) == "minute"] <- "Minute"
names(odv)[names(odv) == "second"] <- "Second"
names(odv)[names(odv) == "lat_dec_deg_be"] <- "Latitude [Degrees North]"
names(odv)[names(odv) == "dec_long_east"] <- "Longitude [Degrees East]"
names(odv)[names(odv) == "bottom_depth_m_be"] <- "Bot. Depth [m]"
names(odv)[names(odv) == "2"] <- "Pressure, Strain Gauge [db]"
names(odv)[names(odv) == "31"] <- "Depth [salt water, m], using lat = 63.4851~^oN"
names(odv)[names(odv) == "5"] <- "Conductivity [mS/cm]"
names(odv)[names(odv) == "36"] <- "Salinity, Practical [PSU]"
names(odv)[names(odv) == "4"] <- "Temperature [ITS-90, ~^oC]"
names(odv)[names(odv) == "35"] <- "Potential Temperature [ITS-90, ~^oC]"
names(odv)[names(odv) == "29"] <- "Density [kg/m~^3]"
names(odv)[names(odv) == "30"] <- "Density [sigma-theta, kg/m~^3]"
names(odv)[names(odv) == "37"] <- "Specific Volume Anomaly [10~^-~^8 m~^3/kg]"
names(odv)[names(odv) == "32"] <- "Oxygen, SBE 43 [ml/l], WS = 2"
names(odv)[names(odv) == "33"] <- "Oxygen, SBE 43 [~$m~#mol/kg], WS = 2"
names(odv)[names(odv) == "34"] <- "Oxygen, SBE 43 [% saturation], WS = 2"
names(odv)[names(odv) == "14"] <- "Fluorescence, WET Labs CDOM [mg/m~^3]"
names(odv)[names(odv) == "15"] <- "Fluorescence, WET Labs ECO-AFL/FL [mg/m~^3]"
names(odv)[names(odv) == "13"] <- "PAR/Irradiance, Biospherical/Licor [~$m~#mol photons/m~^2/sec]"
names(odv)[names(odv) == "16"] <- "Beam Transmission, WET Labs C-Star [%]"
names(odv)[names(odv) == "17"] <- "Beam Attenuation, WET Labs C-Star [1/m]"
names(odv)[names(odv) == "logbook_notes"] <- "Notes"

#Export
write_excel_csv(excel, "C:/Users/katey/Desktop/CEOS Data Processing/auto/2018_wk_auto_ctd_sn7783/data/10_final/excel_2018_autonomous_ctd_downcasts.csv", na = "NA")
write_tsv(odv, "C:/Users/katey/Desktop/CEOS Data Processing/auto/2018_wk_auto_ctd_sn7783/data/10_final/odv_2018_autonomous_ctd_downcasts.txt", na = "-999")