library(tidyverse)
library(here)
library(fs)
library(dplyr)
library(purrr)
library(readr)
library(readxl)

#This script is to fix errors in pre-final files and to add missing Station 15 Cast 2 data from the upcast
#See "Kate's 2019 Rosette CTD Cookbook.docx" for explanations of the errors and solutions 

#importing pre-final rosette bottle data file
bottle_data_path <- "C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/11_pre_final/2019_rosette_bottle_data.csv" #indicate where the bottle data file is located
bottle_data <- read_csv(bottle_data_path, show_col_types = FALSE) %>% #read in bottle data
  mutate(Station = str_extract(Station, "\\d+") |> as.double()) #convert station IDs from character to numbers

#Station 12 Cast 1 
#Copying info from bottle 3 for the missing bottle 4.
bottle_data <- bind_rows(
  bottle_data[1:102, ],
  bottle_data[102, ],
  bottle_data[103:nrow(bottle_data), ]
)

#Adding +1 to bottle positions for bottles 4-11 and the new copy of bottle 3
bottle_data$`Bottle Position`[103:111] <- bottle_data$`Bottle Position`[103:111] + 1

#Adding notes
bottle_data$Notes[103] <- "It appears that one of the two bottles fired at 60 m depth did not register in the software, information for the missing bottle 4 was copied from bottle 3"
bottle_data$Notes[104:111] <- "Paper log indicates all 12 bottles were fired, but the bottle file only had record of 11 bottles fired (appears that one of the two bottles fired at 60m depth did not register in the software). No notes in logbooks about rosette coming back with an empty bottle. Assuming paper log is correct, added +1 to bottle position numbers for bottles fired after bottle 3."


#Station 15 Cast 2
#Copying info from bottle 3 for the missing bottle 4.
bottle_data <- bind_rows(
  bottle_data[1:169, ],
  bottle_data[169, ],
  bottle_data[170:nrow(bottle_data), ]
)

#Adding +1 to bottle position for bottle 4 and the new copy of bottle 3
bottle_data$`Bottle Position`[170:171] <- bottle_data$`Bottle Position`[170:171] + 1

#Adding notes
bottle_data$Notes[170] <- "It appears that one of the four bottles fired at 40m depth did not register in the software, information for the missing bottle 4 was copied from bottle 3"
bottle_data$Notes[171] <- "Paper log indicates all 12 bottles were fired, but the bottle file only had record of 4 bottles fired (appears that one of the four bottles fired at 40m depth did not register in the software and the software stopped recording after the first bottle fired at 30m). No notes in logbooks about rosette coming back with any empty bottles. Assuming paper log is correct, added +1 to bottle position number for the bottle fired after bottle 3."


#Station 20 Cast 2 
#Removing third bottle fired at 20m
bottle_data <- bottle_data[-262, ]

#Subtracting 1 from bottle position for bottles 8-11
bottle_data$`Bottle Position`[262:265] <- bottle_data$`Bottle Position`[262:265] - 1

#Adding notes 
bottle_data$Notes[262:265] <- "Paper log indicates 10 bottles were fired, bottle file indicates 11 bottles were fired. The extra (third) record at 20m was removed, bottle numbers were adjusted accordingly"


#Station 23 Cast 2
#Deleting fifth bottle fired at 10m
bottle_data <- bottle_data[-316, ]

#Subtracting 1 from bottle position for bottles 12-13
bottle_data$`Bottle Position`[316:317] <- bottle_data$`Bottle Position`[316:317] - 1

#Adding notes 
bottle_data$Notes[316:317] <- "Bottle file indicates 13 bottles were fired (rosette only has 12). The extra (fifth) record at 10m was removed, bottle numbers were adjusted accordingly"

#Station 26 Cast 1
#Deleting third bottle fired at 30m
bottle_data <- bottle_data[-328, ]

#Deleting third bottle fired at 10m
bottle_data <- bottle_data[-334, ]

#Subtracting 1 from bottle position for bottles 6-11
bottle_data$`Bottle Position`[328:333] <- bottle_data$`Bottle Position`[328:333] - 1

#Subtracting 2 from bottle position for bottles 13-14
bottle_data$`Bottle Position`[334:335] <- bottle_data$`Bottle Position`[334:335] - 2

#Adding notes 
bottle_data$Notes[328:335] <- "Bottle file indicates 14 bottles were fired (rosette only has 12). The extra (third) records at 30m and 10m were removed, bottle numbers were adjusted accordingly"


#Returning to Station 15 Cast 2
#Copying info from bottle 5 for the missing bottle 6.
bottle_data <- bind_rows(
  bottle_data[1:171, ],
  bottle_data[171, ],
  bottle_data[172:nrow(bottle_data), ]
)

#Adding +1 to bottle position for the new copy of bottle 5
bottle_data$`Bottle Position`[172] <- bottle_data$`Bottle Position`[172] + 1

#Adding a note
bottle_data$Notes[172] <- "It appears that one of the two bottles fired at 30m depth did not register in the software, information for the missing bottle 6 was copied from bottle 5"

#Adding Station 15 Cast 2 missing bottle data 
#Get sdev data  
sdev <- read_tsv("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/13_stn15_cast2/SIMEP_08182019_15_002.btl", col_names = FALSE, show_col_types = FALSE) %>%  #imported the file
  filter(str_detect(X1, "sdev")) %>% #filter to keep only rows with "sdev"
  rowid_to_column() %>% #added a column containing row ID
  separate(X1, into = c("Time", "Scan","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","Sbeox0V","Par","WetCDOM","FlECO_AFL","FlECO_AFL1","V0","V1","Wl0","Wl1","Wl2","F0","F1","F2","TimeJ","value"), sep = " +")  #split data column at white space into multiple columns and assign column names

#Get time data from sdev
time <- sdev %>%
  select(c("rowid", "Time")) #subset time readings as a separate table

#Remove time data from sdev
sdev <- sdev %>% 
  select(-c("Time")) #remove time readings from sdev table 

#Get min data  
min <- read_tsv("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/13_stn15_cast2/SIMEP_08182019_15_002.btl", col_names = FALSE, show_col_types = FALSE) %>%  #imported the file
  filter(str_detect(X1, "min")) %>% #filter to keep only rows with "min"
  rowid_to_column() %>% #added a column containing row ID
  separate(X1, into = c("Scan","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","Sbeox0V","Par","WetCDOM","FlECO_AFL","FlECO_AFL1","V0","V1","Wl0","Wl1","Wl2","F0","F1","F2","TimeJ","value"), sep = " +")  #split data column at white space into multiple columns and assign column names

#Get max data  
max <- read_tsv("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/13_stn15_cast2/SIMEP_08182019_15_002.btl", col_names = FALSE, show_col_types = FALSE) %>%  #imported the file
  filter(str_detect(X1, "max")) %>% #filter to keep only rows with "max"
  rowid_to_column() %>% #added a column containing row ID
  separate(X1, into = c("Scan","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","Sbeox0V","Par","WetCDOM","FlECO_AFL","FlECO_AFL1","V0","V1","Wl0","Wl1","Wl2","F0","F1","F2","TimeJ","value"), sep = " +")  #split data column at white space into multiple columns and assign column names

#Combine sdev, min, max data
combined_stats <- sdev %>%
  select(-rowid) %>% #exclude row IDs temporarily
  map2_dfc(min %>% select(-rowid), #combine with min data 
           ~ map2_chr(.x, .y, ~ paste0("(", .x, ", ", .y, ")"))) %>%
  map2_dfc(max %>% select(-rowid), #combine with max data
           ~ map2_chr(.x, .y, ~ gsub("\\)", paste0(", ", .y, ")"), .x))) %>%
  mutate(rowid = sdev$rowid, .before = 1) %>%# Reattach row IDs
  select(c("rowid", "Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","WetCDOM","FlECO_AFL","FlECO_AFL1","Par")) %>% #remove unnecessary columns
  rename_with(~ paste0(.x, "_sdevminmax")) %>% #added clarification to column headings 
  rename(rowid = rowid_sdevminmax) #changed rowid column heading back 

#Get bottle position, date, and avg data
avg <- read_tsv("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/13_stn15_cast2/SIMEP_08182019_15_002.btl", col_names = FALSE, show_col_types = FALSE) %>%  #imported the file
  filter(str_detect(X1, "avg")) %>% #filter to keep only rows with "avg"
  mutate(source_file = "SIMEP_08182019_15_002") %>% #add source_file 
  rowid_to_column() %>% #added a column containing row ID
  separate(X1, into = c("bottle_position","Month","Day","Year","Density00","Sigma_00","Sbeox0ML_L","Sbox0Mm_Kg","Sbeox0PS","Potemp090C","Sva","Scan","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","Sbeox0V","Par","WetCDOM","FlECO_AFL","FlECO_AFL1","V0","V1","Wl0","Wl1","Wl2","F0","F1","F2","TimeJ","value"), sep = " +") %>% #split data column at white space into multiple columns and assign column names
  select(c("source_file", "rowid", "bottle_position","Month","Day","Year","Density00","Sigma_00","Sbeox0ML_L","Sbox0Mm_Kg","Sbeox0PS","Potemp090C","Sva","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","WetCDOM","FlECO_AFL","FlECO_AFL1","Par")) %>% #remove unnecessary columns
  mutate(Month = match(Month,month.abb)) #changed month from MMM (e.g., AUG) to M (e.g., 8)

#Join data frames
stn15cast2 <- left_join(avg, time, by = "rowid")
stn15cast2 <- left_join(stn15cast2, combined_stats, by = "rowid")
remove(avg, combined_stats, time, max, min, sdev)

#Read in the merge key, which was made by checking CTD casts start times against start times listed in the logbook to properly match casts with their metadata
logbook <- read_excel("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/logbooks/merge_key.xlsx", sheet = "Sheet1") #read in the merge key
logbook <- logbook %>% mutate(source_file = str_remove_all(source_file, ".cnv")) %>%
  mutate(Station = str_extract(Station, "\\d+") |> as.double()) #convert station IDs from character to numbers

#Merge bottle data with logbook 
stn15cast2 <- left_join(stn15cast2, logbook, by=c("source_file"="source_file"))
remove(logbook)

#Re-arrange columns
stn15cast2 <- stn15cast2 %>%
  select(c("Station","Cast","Year","Month","Day","Time","bottle_position","PrdM","PrdM_sdevminmax","DepSM","DepSM_sdevminmax","Dz_dtM","Dz_dtM_sdevminmax","Tv290C","Tv290C_sdevminmax","Potemp090C","C0mS_cm","C0mS_cm_sdevminmax","Sal00","Sal00_sdevminmax","Density00","Sigma_00","Sva","Sbeox0ML_L","Sbox0Mm_Kg","Sbeox0PS","WetCDOM","WetCDOM_sdevminmax","FlECO_AFL","FlECO_AFL_sdevminmax","FlECO_AFL1","FlECO_AFL1_sdevminmax","Par","Par_sdevminmax","Notes"))

stn15cast2 <- stn15cast2 %>% rename(
  'Year (UTC)'=Year,
  'Month (UTC)'=Month,
  'Day (UTC)'=Day,
  'Time (UTC)'=Time,
  'Bottle Position'=bottle_position,
  'Pressure, Strain Gauge [db]'=PrdM,
  'Pressure, Strain Gauge [db] (sdev, min, max)'=PrdM_sdevminmax,
  'Depth [salt water, m]'=DepSM,
  'Depth [salt water, m] (sdev, min, max)'=DepSM_sdevminmax,
  'Descent Rate [m/s]'=Dz_dtM,
  'Descent Rate [m/s] (sdev, min, max)'=Dz_dtM_sdevminmax,
  'Temperature [ITS-90, deg C]'=Tv290C,
  'Temperature [ITS-90, deg C] (sdev, min, max)'=Tv290C_sdevminmax,
  'Potential Temperature [ITS-90, deg C]'=Potemp090C,
  'Conductivity [mS/cm]'=C0mS_cm,
  'Conductivity [mS/cm] (sdev, min, max)'=C0mS_cm_sdevminmax,
  'Salinity, Practical [PSU]'=Sal00,
  'Salinity, Practical [PSU] (sdev, min, max)'=Sal00_sdevminmax,
  'Density [density, kg/m^3]'=Density00,
  'Density [sigma-theta, kg/m^3]'=Sigma_00,
  'Specific Volume Anomaly [10^-8 * m^3/kg]'=Sva,
  'Oxygen, SBE 43 [ml/l]'=Sbeox0ML_L,
  'Oxygen, SBE 43 [umol/kg]'=Sbox0Mm_Kg,
  'Oxygen, SBE 43 [% saturation]'=Sbeox0PS,
  'Fluorescence, WET Labs CDOM [mg/m^3]'=WetCDOM,
  'Fluorescence, WET Labs CDOM [mg/m^3] (sdev, min, max)'=WetCDOM_sdevminmax,
  'Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]'=FlECO_AFL,
  'Fluorescence, WET Labs ECO-AFL/FL [mg/m^3] (sdev, min, max)'=FlECO_AFL_sdevminmax,
  'Fluorescence, WET Labs ECO-AFL/FL, 2 [mg/m^3]'=FlECO_AFL1,
  'Fluorescence, WET Labs ECO-AFL/FL, 2 [mg/m^3] (sdev, min, max)'=FlECO_AFL1_sdevminmax,
  'PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]'=Par,
  'PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec] (sdev, min, max)'=Par_sdevminmax)

#Duplicate rows since there were two bottles at each depth
stn15cast2 <- bind_rows(stn15cast2, stn15cast2)

#Adding +1 to bottle position for every duplicate bottle
stn15cast2$`Bottle Position` <- as.numeric(stn15cast2$`Bottle Position`)
stn15cast2$`Bottle Position`[4:6] <- stn15cast2$`Bottle Position`[4:6] + 1

#Adding a note
stn15cast2$Notes[1:6] <- "Software did not register these bottles firing, information for the missing bottles 7-12 was extracted from the upcast"

#Merge Station 15 Cast 2 data with the rest of bottle data and re-sort by station, cast, bottle position
bottle_data <- bind_rows(
  bottle_data %>% mutate(across(everything(), as.character)),
  stn15cast2 %>% mutate(across(everything(), as.character))
)

bottle_data <- bottle_data %>%
  mutate(across(c(1, 2, 7), as.numeric))

bottle_data <- bottle_data %>%
  arrange(`Station`, `Cast`, `Bottle Position`)

#Delete unnecessary comment 
bottle_data$Notes[60] <- NA

write.csv(bottle_data, "C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/14_final/2019_rosette_bottle_data.csv", row.names=FALSE) #saved bottle data as a .CSV file