library(tidyverse)
library(here)
library(fs)
library(dplyr)
library(purrr)
library(readxl)

#This script is to output a more user-friendly rosette bottle data file 
#For Stn 3
#Get sdev data  
sdev <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/12_stn3/10_bottlesum"))) %>% #created a list of files to be imported
  mutate(data = map(files, ~ read_tsv(.x, col_names = FALSE, show_col_types = FALSE))) %>% #imported the files
  mutate(filtered_data = map(data, ~ filter(.x, str_detect(X1, "sdev")))) %>% #filter to keep only rows with "sdev"
  select(filtered_data) %>% #combine all filtered data into a single tibble
  unnest(filtered_data) %>%
  rowid_to_column() %>% #added a column containing row ID
  separate(X1, into = c("Time", "Scan","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","Sbeox0V","Par","WetCDOM","FlECO_AFL","FlECO_AFL1","V0","V1","Wl0","Wl1","Wl2","F0","F1","F2","TimeJ","Spar","Cpar","value"), sep = " +")  #split data column at white space into multiple columns and assign column names

#Get time data from sdev
time <- sdev %>%
  select(c("rowid", "Time")) #subset time readings as a separate table

#Remove time data from sdev
sdev <- sdev %>% 
  select(-c("Time")) #remove time readings from sdev table 

#Get min data  
min <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/12_stn3/10_bottlesum"))) %>% #created a list of files to be imported
  mutate(data = map(files, ~ read_tsv(.x, col_names = FALSE, show_col_types = FALSE))) %>% #imported the files
  mutate(filtered_data = map(data, ~ filter(.x, str_detect(X1, "min")))) %>% #filter to keep only rows with "min"
  select(filtered_data) %>% #combine all filtered data into a single tibble
  unnest(filtered_data) %>%
  rowid_to_column() %>% #added a column containing row ID
  separate(X1, into = c("Scan","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","Sbeox0V","Par","WetCDOM","FlECO_AFL","FlECO_AFL1","V0","V1","Wl0","Wl1","Wl2","F0","F1","F2","TimeJ","Spar","Cpar","value"), sep = " +")  #split data column at white space into multiple columns and assign column names

#Get max data  
max <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/12_stn3/10_bottlesum"))) %>% #created a list of files to be imported
  mutate(data = map(files, ~ read_tsv(.x, col_names = FALSE, show_col_types = FALSE))) %>% #imported the files
  mutate(filtered_data = map(data, ~ filter(.x, str_detect(X1, "max")))) %>% #filter to keep only rows with "max"
  select(filtered_data) %>% #combine all filtered data into a single tibble
  unnest(filtered_data) %>%
  rowid_to_column() %>% #added a column containing row ID
  separate(X1, into = c("Scan","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","Sbeox0V","Par","WetCDOM","FlECO_AFL","FlECO_AFL1","V0","V1","Wl0","Wl1","Wl2","F0","F1","F2","TimeJ","Spar","Cpar","value"), sep = " +")  #split data column at white space into multiple columns and assign column names

#Combine sdev, min, max data
combined_stats <- sdev %>%
  select(-rowid) %>% #exclude row IDs temporarily
  map2_dfc(min %>% select(-rowid), #combine with min data 
           ~ map2_chr(.x, .y, ~ paste0("(", .x, ", ", .y, ")"))) %>%
  map2_dfc(max %>% select(-rowid), #combine with max data
           ~ map2_chr(.x, .y, ~ gsub("\\)", paste0(", ", .y, ")"), .x))) %>%
  mutate(rowid = sdev$rowid, .before = 1) %>%# Reattach row IDs
  select(c("rowid", "Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","WetCDOM","FlECO_AFL","FlECO_AFL1","Par", "Spar","Cpar")) %>% #remove unnecessary columns
  rename_with(~ paste0(.x, "_sdevminmax")) %>% #added clarification to column headings 
  rename(rowid = rowid_sdevminmax) #changed rowid column heading back 

#Remove unnecessary data frames
remove(max, min, sdev)

#Get bottle position, date, and avg data
avg <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/12_stn3/10_bottlesum"))) %>% #created a list of files to be imported
  mutate(data = map(files, ~ read_tsv(.x, col_names = FALSE, show_col_types = FALSE))) %>% #imported the files
  mutate(filtered_data = map2(data, files, ~ filter(.x, str_detect(X1, "avg")) %>% #filter to keep only rows with "avg"
                                mutate(source_file = .y))) %>% #add source_file to each filtered dataset
  select(filtered_data) %>% #combine all filtered data into a single tibble
  unnest(filtered_data) %>%
  rowid_to_column() %>% #added a column containing row ID
  separate(X1, into = c("bottle_position","Month","Day","Year","Density00","Sigma_00","Sbeox0ML_L","Sbox0Mm_Kg","Sbeox0PS","Potemp090C","Sva","Scan","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","Sbeox0V","Par","WetCDOM","FlECO_AFL","FlECO_AFL1","V0","V1","Wl0","Wl1","Wl2","F0","F1","F2","TimeJ","Spar","Cpar","value"), sep = " +") %>% #split data column at white space into multiple columns and assign column names
  select(c("source_file", "rowid", "bottle_position","Month","Day","Year","Density00","Sigma_00","Sbeox0ML_L","Sbox0Mm_Kg","Sbeox0PS","Potemp090C","Sva","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","WetCDOM","FlECO_AFL","FlECO_AFL1","Par", "Spar","Cpar")) %>% #remove unnecessary columns
  mutate(Month = match(Month,month.abb)) %>% #changed month from MMM (e.g., AUG) to M (e.g., 8)
  mutate(source_file = str_remove_all(source_file, "C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/12_stn3/10_bottlesum/")) %>% #remove unnecessary text
  mutate(source_file = str_remove_all(source_file, ".btl")) #remove unnecessary text

#Join data frames
stn3 <- left_join(avg, time, by = "rowid")
stn3 <- left_join(stn3, combined_stats, by = "rowid")
remove(avg,combined_stats,time)

#For the rest of the casts
#Get sdev data  
sdev <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/10_bottlesum"))) %>% #created a list of files to be imported
  mutate(data = map(files, ~ read_tsv(.x, col_names = FALSE, show_col_types = FALSE))) %>% #imported the files
  mutate(filtered_data = map(data, ~ filter(.x, str_detect(X1, "sdev")))) %>% #filter to keep only rows with "sdev"
  select(filtered_data) %>% #combine all filtered data into a single tibble
  unnest(filtered_data) %>%
  rowid_to_column() %>% #added a column containing row ID
  separate(X1, into = c("Time", "Scan","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","Sbeox0V","Par","WetCDOM","FlECO_AFL","FlECO_AFL1","V0","V1","Wl0","Wl1","Wl2","F0","F1","F2","TimeJ","value"), sep = " +")  #split data column at white space into multiple columns and assign column names

#Get time data from sdev
time <- sdev %>%
  select(c("rowid", "Time")) #subset time readings as a separate table

#Remove time data from sdev
sdev <- sdev %>% 
  select(-c("Time")) #remove time readings from sdev table 

#Get min data  
min <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/10_bottlesum"))) %>% #created a list of files to be imported
  mutate(data = map(files, ~ read_tsv(.x, col_names = FALSE, show_col_types = FALSE))) %>% #imported the files
  mutate(filtered_data = map(data, ~ filter(.x, str_detect(X1, "min")))) %>% #filter to keep only rows with "min"
  select(filtered_data) %>% #combine all filtered data into a single tibble
  unnest(filtered_data) %>%
  rowid_to_column() %>% #added a column containing row ID
  separate(X1, into = c("Scan","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","Sbeox0V","Par","WetCDOM","FlECO_AFL","FlECO_AFL1","V0","V1","Wl0","Wl1","Wl2","F0","F1","F2","TimeJ","value"), sep = " +")  #split data column at white space into multiple columns and assign column names

#Get max data  
max <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/10_bottlesum"))) %>% #created a list of files to be imported
  mutate(data = map(files, ~ read_tsv(.x, col_names = FALSE, show_col_types = FALSE))) %>% #imported the files
  mutate(filtered_data = map(data, ~ filter(.x, str_detect(X1, "max")))) %>% #filter to keep only rows with "max"
  select(filtered_data) %>% #combine all filtered data into a single tibble
  unnest(filtered_data) %>%
  rowid_to_column() %>% #added a column containing row ID
  separate(X1, into = c("Scan","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","Sbeox0V","Par","WetCDOM","FlECO_AFL","FlECO_AFL1","V0","V1","Wl0","Wl1","Wl2","F0","F1","F2","TimeJ","value"), sep = " +")  #split data column at white space into multiple columns and assign column names

#Combine sdev, min, max data
combined_stats <- sdev %>%
  select(-rowid) %>% #exclude row IDs temporarily
  map2_dfc(min %>% select(-rowid), #combine with min data 
           ~ map2_chr(.x, .y, ~ paste0("(", .x, ", ", .y, ")"))) %>%
  map2_dfc(max %>% select(-rowid), #combine with max data
           ~ map2_chr(.x, .y, ~ gsub("\\)", paste0(", ", .y, ")"), .x))) %>%
  mutate(rowid = sdev$rowid, .before = 1) %>%# Reattach row IDs
  select(c("rowid", "Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","WetCDOM","FlECO_AFL","FlECO_AFL1","Par")) %>% #remove unnecessary columns
  rename_with(~ paste0(.x, "_sdevminmax")) %>% #added clarification to column headings 
  rename(rowid = rowid_sdevminmax) #changed rowid column heading back 

#Remove unnecessary data frames
remove(max, min, sdev)

#Get bottle position, date, and avg data
avg <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/10_bottlesum"))) %>% #created a list of files to be imported
  mutate(data = map(files, ~ read_tsv(.x, col_names = FALSE, show_col_types = FALSE))) %>% #imported the files
  mutate(filtered_data = map2(data, files, ~ filter(.x, str_detect(X1, "avg")) %>% #filter to keep only rows with "avg"
                                mutate(source_file = .y))) %>% #add source_file to each filtered dataset
  select(filtered_data) %>% #combine all filtered data into a single tibble
  unnest(filtered_data) %>%
  rowid_to_column() %>% #added a column containing row ID
  separate(X1, into = c("bottle_position","Month","Day","Year","Density00","Sigma_00","Sbeox0ML_L","Sbox0Mm_Kg","Sbeox0PS","Potemp090C","Sva","Scan","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","Sbeox0V","Par","WetCDOM","FlECO_AFL","FlECO_AFL1","V0","V1","Wl0","Wl1","Wl2","F0","F1","F2","TimeJ","value"), sep = " +") %>% #split data column at white space into multiple columns and assign column names
  select(c("source_file", "rowid", "bottle_position","Month","Day","Year","Density00","Sigma_00","Sbeox0ML_L","Sbox0Mm_Kg","Sbeox0PS","Potemp090C","Sva","Dz_dtM","PrdM","DepSM","Tv290C","C0mS_cm","Sal00","WetCDOM","FlECO_AFL","FlECO_AFL1","Par")) %>% #remove unnecessary columns
  mutate(Month = match(Month,month.abb)) %>% #changed month from MMM (e.g., AUG) to M (e.g., 8)
  mutate(source_file = str_remove_all(source_file, "C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/10_bottlesum/")) %>% #remove unnecessary text
  mutate(source_file = str_remove_all(source_file, ".btl")) #remove unnecessary text

#Join data frames
all <- left_join(avg, time, by = "rowid")
all <- left_join(all, combined_stats, by = "rowid")
all <- bind_rows(all, stn3)
remove(avg, combined_stats, time, stn3)

#Read in the merge key, which was made by checking CTD casts start times against start times listed in the logbook to properly match casts with their metadata
logbook <- read_excel("C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/logbooks/merge_key.xlsx", sheet = "Sheet1") #read in the merge key
logbook <- logbook %>% mutate(source_file = str_remove_all(source_file, ".cnv"))

#Merge CTD data with logbook 
all <- left_join(all, logbook, by=c("source_file"="source_file"))
all$Notes <- gsub(", but the downcast was still usable.", "", all$Notes)
all$Notes <- gsub("No acclimation period, pump likely was not working until after bottle 5 and before bottle 6 were fired.", "No acclimation period. CTD pump was likely not working during the firing of bottles 1 through 5 for this cast", all$Notes)

#Re-arrange columns
all <- all %>%
  select(c("Station","Cast","Year","Month","Day","Time","bottle_position","PrdM","PrdM_sdevminmax","DepSM","DepSM_sdevminmax","Dz_dtM","Dz_dtM_sdevminmax","Tv290C","Tv290C_sdevminmax","Potemp090C","C0mS_cm","C0mS_cm_sdevminmax","Sal00","Sal00_sdevminmax","Density00","Sigma_00","Sva","Sbeox0ML_L","Sbox0Mm_Kg","Sbeox0PS","WetCDOM","WetCDOM_sdevminmax","FlECO_AFL","FlECO_AFL_sdevminmax","FlECO_AFL1","FlECO_AFL1_sdevminmax","Par","Par_sdevminmax","Spar","Spar_sdevminmax","Cpar","Cpar_sdevminmax","Notes"))

all <- all %>% rename(
  'Year (UTC)'=Year,
  'Month (UTC)'=Month,
  'Day (UTC)'=Day,
  'Time (UTC)'=Time,
  'Bottle Position'=bottle_position,
  'Pressure, Strain Gauge [db]'=PrdM,
  'Pressure, Strain Gauge [db] (sdev, min, max)'=PrdM_sdevminmax,
  'Depth [salt water, m]'=DepSM,
  'Depth [salt water, m] (sdev, min, max)'=DepSM_sdevminmax,
  'Descent Rate [m/s]'=Dz_dtM,
  'Descent Rate [m/s] (sdev, min, max)'=Dz_dtM_sdevminmax,
  'Temperature [ITS-90, deg C]'=Tv290C,
  'Temperature [ITS-90, deg C] (sdev, min, max)'=Tv290C_sdevminmax,
  'Potential Temperature [ITS-90, deg C]'=Potemp090C,
  'Conductivity [mS/cm]'=C0mS_cm,
  'Conductivity [mS/cm] (sdev, min, max)'=C0mS_cm_sdevminmax,
  'Salinity, Practical [PSU]'=Sal00,
  'Salinity, Practical [PSU] (sdev, min, max)'=Sal00_sdevminmax,
  'Density [density, kg/m^3]'=Density00,
  'Density [sigma-theta, kg/m^3]'=Sigma_00,
  'Specific Volume Anomaly [10^-8 * m^3/kg]'=Sva,
  'Oxygen, SBE 43 [ml/l]'=Sbeox0ML_L,
  'Oxygen, SBE 43 [umol/kg]'=Sbox0Mm_Kg,
  'Oxygen, SBE 43 [% saturation]'=Sbeox0PS,
  'Fluorescence, WET Labs CDOM [mg/m^3]'=WetCDOM,
  'Fluorescence, WET Labs CDOM [mg/m^3] (sdev, min, max)'=WetCDOM_sdevminmax,
  'Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]'=FlECO_AFL,
  'Fluorescence, WET Labs ECO-AFL/FL [mg/m^3] (sdev, min, max)'=FlECO_AFL_sdevminmax,
  'Fluorescence, WET Labs ECO-AFL/FL, 2 [mg/m^3]'=FlECO_AFL1,
  'Fluorescence, WET Labs ECO-AFL/FL, 2 [mg/m^3] (sdev, min, max)'=FlECO_AFL1_sdevminmax,
  'PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec]'=Par,
  'PAR/Irradiance, Biospherical/Licor [umol photons/m^2/sec] (sdev, min, max)'=Par_sdevminmax,
  'SPAR, Biospherical/Licor [umol photons/m^2/sec]'=Spar,
  'SPAR, Biospherical/Licor [umol photons/m^2/sec] (sdev, min, max)'=Spar_sdevminmax,
  'CPAR/Corrected Irradiance [%]'=Cpar,
  'CPAR/Corrected Irradiance [%] (sdev, min, max)'=Cpar_sdevminmax)

write.csv(all, "C:/Users/katey/Desktop/CEOS Data Processing/ros/2019_wk_ros_ctd_sn7798/data/11_final/2019_rosette_bottle_data.csv", row.names=FALSE) #saved bottle data as a .CSV file