# LOAD THE LIBRARIES------------------------------------------------------------
# install.package('tidyverse')
# install.package('modelr')
# install.package('ranger')
library(tidyverse)
library(modelr)
library(ranger)

# READ THE DATA-----------------------------------------------------------------
options <-  read_csv("relevantoptions2",
                     col_types = cols(
                       todaydate = col_date(format = "%Y%m%d"),
                       settledate = col_date(format = "%Y%m%d"),
                       daystosettle = col_integer(),
                       today_sp_price = col_double(),
                       settle_sp_price = col_double(),
                       optiontype = col_character(),
                       optionstrike = col_double(),
                       optionclosingprice = col_double(),
                       optionhighprice = col_double(),
                       optionlowprice = col_double(),
                       optionvol = col_integer(),
                       optionopenint = col_integer()
                     )
)

# ADD FEATURES------------------------------------------------------------------
# buy_gain: how much we will gain at settleday if we buy 1 unit of this option on todaydate
# bet_price: what the option writer (and market) implies (bets) about sp500 price at the settle date
options <- options %>% 
  mutate(optiontype = as.double(optiontype == "put")) %>%
  mutate(buy_gain = (optionstrike - settle_sp_price)*(optiontype*2 - 1)) %>%
  mutate(bet_price = optionstrike - optionclosingprice*(optiontype*2 - 1))

# lagged (1-3-5 investment days(?)) prices for each option
prices <- options %>% 
  select(todaydate, today_sp_price) %>%
  unique() %>%
  mutate(lag_one = lag(today_sp_price), lag_three = lag(today_sp_price, 3), lag_five = lag(today_sp_price, 5) ) %>%
  select(-(today_sp_price))
options <- left_join(options, prices, by='todaydate') %>%
  filter(!is.na(lag_five))

# summaries of the betted prices for sp500 at todaydate. 
# bets are weighted according to openinterest or optionvol. (names are self-explatory)
# (Anil's impression: so far these seem useless, but I leave them for future reference!)
summary_options <- function(x,y){
  back_bets <- options %>%
    filter(todaydate == x, settledate <= y)
  forward_bets <- options %>%
    filter(todaydate == x, settledate >= y)
  return(c(sum(back_bets$bet_price * back_bets$optionopenint) / sum(back_bets$optionopenint), 
           sum(back_bets$bet_price * back_bets$optionvol) / sum(back_bets$optionvol),
           sum(forward_bets$bet_price * forward_bets$optionopenint) / sum(forward_bets$optionopenint), 
           sum(forward_bets$bet_price * forward_bets$optionvol) / sum(forward_bets$optionvol),
           sum(back_bets$optionvol), sum(back_bets$optionopenint),
           sum(forward_bets$optionvol), sum(forward_bets$optionopenint)
  )
  )
}
options <- options %>% mutate(openint_weighted_closer_bets = summary_options(todaydate, settledate)[1], 
                              optionvol_weighted_closer_bets = summary_options(todaydate, settledate)[2], 
                              openint_weighted_further_bets = summary_options(todaydate, settledate)[3], 
                              optionvol_weighted_further_bets = summary_options(todaydate, settledate)[4], 
                              closer_optionvol = summary_options(todaydate, settledate)[5],
                              closer_openint = summary_options(todaydate, settledate)[6],
                              further_optionvol = summary_options(todaydate, settledate)[7],
                              further_openint = summary_options(todaydate, settledate)[8]
)



# WARNING: DO NOT INCLUDE FOLLOWING FEATURES/PREDICTORS FOR THE PREDICTION:
# settle_sp_price 
# buy_gain


# CREATE THE PREDICTIONS -------------------------------------------------------
# At each day, make a prediction for the expected gain for each option if we buy
# one unit of that option. To make the predictions, we trained a random forest
# on the already expired options. 
# Below, I re-trained the RF once at each train_epoch on the data of the last 
# train_window days.
# These two might be crucial parameters to tune.

#(Note to myself: this part of the code can be cleaned a little before giving to 
# the French students.)
N = length(prices$todaydate) - 20
preds <- list()
ix <- 1
last_train_day <- 0
train_epoch <- 30
train_windows <- Inf
for(today in tail(prices$todaydate, N)){
  if(today > (last_train_day + train_epoch)) {
    training_data <- filter(options, settledate <= today, settledate >= (today - train_windows) ) %>% select(-todaydate, -settledate, -settle_sp_price)
    mod_rf <- ranger(buy_gain~., data = training_data)
    print(ix/N*100)
    last_train_day <- today
  }
  target_data <- filter(options, todaydate == today) %>% select(-todaydate, -settledate, -settle_sp_price)
  preds[[ix]] <- predict(mod_rf, target_data) 
  ix <- ix + 1
}

preds2 <- list()
for(i in 1:length(preds)){
  preds2[[i]] <- preds[[i]]$predictions
}
preds2 <- unlist(preds2) #%>% tail(length(preds2)) 
options2 <- options %>%
  tail(length(preds2)) %>% 
  mutate(pred_buy_gain = preds2) #, estimated_vars = vars2) 

## May want to avoid recomputing these two
write_rds(options2, "options2.rds")
#read_rds("options.rds")


# TRADING STRATEGY -------------------------------------------------------------
# Our simple strategy is characterized by gamma (how conservartive/aggressive we are).

# We first compute the variance of our predictions as the MSE for the options settled 
# within last var_est_period days, we call this variance as estimated_vars.
var_est_period <- 21
est_var <- function(currentdate, var_est_period){
  res <- options2 %>% 
    filter(settledate< currentdate, settledate > (currentdate - var_est_period)) 
  mean((res$pred_buy_gain - res$buy_gain)^2)
}
prices <- prices %>%
  mutate(estimated_vars = 0)
for(ix in 1:length(prices$todaydate)){
  prices$estimated_vars[[ix]] <- est_var(prices$todaydate[[ix]], var_est_period)
}
prices <- prices %>%
  select(-lag_one, -lag_three, -lag_five)
options2 <- options2 %>%
  left_join(prices, by = "todaydate") %>%
  filter(!is.na(estimated_vars))

# We simply buy K units of each option such that
# K = C1 predicted_gain /  (estimated_vars)^C2
# Where negative K correspond to selling options, C1 and C2 are characterizing
# how agressive/conservative our strategy is.

C1 <- 1
C2 <- 1/2
options2 <- options2 %>%
  mutate(buy_units = C1* pred_buy_gain/(estimated_vars^C2)) %>%
  mutate(actual_gain = buy_units * buy_gain, cost = buy_units*optionclosingprice) 

daily <- options2 %>% 
  group_by(todaydate) %>%
  summarise(daily_gain = sum(actual_gain), daily_cost = sum(cost), daily_transaction_buy = sum(max(buy_units,0)), daily_transaction_sell = -sum(min(buy_units,0))) %>%
  mutate(cumulative_gain = cumsum(daily_gain), cumulative_cost = cumsum(daily_cost), cum_transactions_buy = cumsum(daily_transaction_buy), cum_transactions_sell = cumsum(daily_transaction_sell))

ggplot(daily) + geom_smooth(aes(todaydate, cumulative_cost, color = "Cumulative Cost")) + geom_smooth(aes(todaydate, cumulative_gain, color = "Cumulative Profit"))
ggplot(daily) + geom_smooth(aes(todaydate, daily_cost, color = "Daily Cost")) + geom_smooth(aes(todaydate, daily_gain, color = "Daily Profit"))
ggplot(daily) + geom_line(aes(todaydate, daily_cost, color = "Daily Cost")) + geom_line(aes(todaydate, daily_gain, color = "Daily Profit"))
ggplot(daily) + geom_line(aes(todaydate, cumulative_cost, color = "Cumulative Cost")) + geom_line(aes(todaydate, cumulative_gain, color = "Cumulative Profit"))



####### CRUCIAL PARAMETERS TO TUNE
## Prediction: Features, train_epoch, train_window -- Tune according to the prediction performance, not the profits.
## Variance estimation: var_est_period -- I believe this is the most important one. S
# marter ways of estimating uncertainty should be investigated.
## Trading: C1, C2 -- by looking to the cost and profits

# Overall I believe the crucial steps are Variance Estimation and Trading steps - I would focus on these first.
# Also, the huge missing step is evaluation. (Which I hope will add some on Monday.)

#Note: Some parts of the code are a bit clumsy, but I can re-write them again before French students arrive.
