library(tidyverse)
library(here)
library(fs)
library(readxl)
library(janitor)

#This script is to check whether sectioning was done correctly

#Read in the casts
data <- tibble(files = fs::dir_ls(here("C:/Users/katey/Desktop/CEOS Data Processing/ros/2021_wk_ros_ctd_sn7798/data/02_section")))  %>% #created a list of files to be imported
  mutate(data = pmap(list(files),
                     ~ read_tsv(..1, col_names = FALSE, show_col_types = FALSE))) %>% #imported the files
  mutate(data = pmap(list(files, data), 
                     ~ mutate(..2, source_file = as.character(..1)))) %>% #added file name to each row in each file
  select(data) %>%
  map_df(bind_rows) #joined all tables in filtered_data into one

#Subset needed rows
data_filtered_span0 <- data %>% filter(grepl("# span 0", X1)) #kept rows containing span 0 (cast number range)
data_filtered_span0 <- separate(data = data_filtered_span0, col = X1,  sep = " +", into = c("a","b","c","d","e","f")) #split data column into several
data_filtered_span0 <- data_filtered_span0 %>% mutate(e = str_remove_all(e, ",")) #removed commas from start scan column
data_filtered_span0 <- data_filtered_span0 %>% select(c("e","f","source_file"))

data_filtered_span2 <- data %>% filter(grepl("# span 2 =", X1)) #kept rows containing span 2 (pressure range)
data_filtered_span2 <- separate(data = data_filtered_span2, col = X1,  sep = " +", into = c("a","b","c","d","e","f")) #split data column into several
data_filtered_span2 <- data_filtered_span2 %>% mutate(e = str_remove_all(e, ",")) #removed commas from start scan column
data_filtered_span2 <- data_filtered_span2 %>% select(c("e","f","source_file"))

data_filtered_span3 <- data %>% filter(grepl("# span 3", X1)) #kept rows containing span 3 (depth range)
data_filtered_span3 <- separate(data = data_filtered_span3, col = X1,  sep = " +", into = c("a","b","c","d","e","f")) #split data column into several
data_filtered_span3 <- data_filtered_span3 %>% mutate(e = str_remove_all(e, ",")) #removed commas from start scan column
data_filtered_span3 <- data_filtered_span3 %>% select(c("e","f","source_file"))

#Check that the scan counts are what they should have been according to the Excel sheet
excel <- read_excel("C:/Users/katey/Desktop/CEOS Data Processing/ros/2021_wk_ros_ctd_sn7798/logbooks/section.xlsx", sheet = "Sheet1", na = "NA") #read in the excel sheet
data_filtered_span0 <- data_filtered_span0  %>%  mutate(source_file = str_remove_all(source_file, "C:/Users/katey/Desktop/CEOS Data Processing/ros/2021_wk_ros_ctd_sn7798/data/02_section/|\\.cnv")) #remove some text to create a key column to match with the excel sheet
excel <- left_join(excel, data_filtered_span0, by = c("cast_id" = "source_file")) #joined the tables
excel <- excel %>% mutate(start_scan = as.numeric(start_scan), end_scan = as.numeric(end_scan), e = as.numeric(e), f = as.numeric(f)) #convert columns to numeric
excel$start_check <- excel$start_scan - excel$e
excel$end_check <- excel$end_scan - excel$f
excel  <- excel %>% select(-notes) #removed unneeded columns 

#Prepare to check that none of the pressure and depth values are below 0
data_filtered_span2 <- data_filtered_span2 %>% mutate(e = as.numeric(e), f = as.numeric(f)) #convert columns to numeric
data_filtered_span3 <- data_filtered_span3 %>% mutate(e = as.numeric(e), f = as.numeric(f)) #convert columns to numeric
depth_check <- left_join(data_filtered_span2, data_filtered_span3, by = "source_file") #join the tables together
depth_check <- depth_check %>% rowwise() %>% mutate(has_negatives = rowSums(across(everything(), ~(.<0)))) #create a flag column to mark all rows containing a negative

#CHECK THAT THESE VALUES ARE ALL ZERO!
sum(is.na(excel)) #if 0, then there are no NAs in the df
length(which(excel$start_check!=0)) #if 0, then the start cast ID was entered correctly from the Excel to the SBE Data Processing Section module
length(which(excel$end_check!=0)) #if 0, then the end cast ID was entered correctly from the Excel to the SBE Data Processing Section module
length(which(depth_check$has_negatives!=0)) #if 0, then none of the pressure/depth values were negative (in the air)