## Kaggle Exploring Kobe`s Shot
library(dplyr)
library(ggplot2)

data <- read.csv("data.csv", stringsAsFactors = FALSE)

train <- data[!is.na(data$shot_made_flag),]
test <- data[is.na(data$shot_made_flag),]

train$shot_made_flag <- as.factor(train$shot_made_flag)

names(train)

#a plot to see accuracy by feature
pplot <- function(feat) {
  feat <- substitute(feat)
  ggplot(data = train, aes_q(x = feat)) +
    geom_bar(aes(fill = shot_made_flag), stat = "count", position = "fill") +
    scale_fill_brewer(palette = "Set1", direction = +1) +
    ggtitle(paste("accuracy by", feat))
  
}

# a plot to see position by feature
courtplot <- function(feat) {
  feat <- substitute(feat)
  train %>% 
    ggplot(aes(x = lon, y = lat)) +
    geom_point(aes_q(color = feat), alpha = 0.7, size = 3) +
    ylim(c(33.7, 34.0883)) +
    scale_color_brewer(palette = "Set1") +
    theme_void() +
    ggtitle(paste(feat))
}

# Let`s take a look at the locations for the various shot_types
courtplot(combined_shot_type) #hard to see here.

# using the ggplot
ggplot() +
  geom_point(data = filter(train, combined_shot_type == "Jump Shot"),
             aes(x = lon, y = lat), color = "grey", alpha = 0.3, size = 2) +
  geom_point(data = filter(train, combined_shot_type != "Jump Shot"),
             aes(x = lon, y = lat, 
                 color = combined_shot_type), alpha = 0.7, size = 3) +
  ylim(c(33.7, 34.0883)) +
  scale_color_brewer(palette = "Set1") +
  theme_void() +
  ggtitle("Shot Types")

ggplot() +
  geom_point(data = filter(train, combined_shot_type != "Jump Shot",
                           shot_distance < 5),
             aes(x = loc_x, y = loc_y, 
                 color = shot_made_flag),
             alpha = 0.7, size = 3) +
  scale_color_brewer(palette = "Set1") +
  geom_point(aes(x =0, y = 0), size = 5, shape = 4) +
  theme_void() +
  ggtitle("Shots from up close")

# Let’s also take a look at all the shots plotted on the court and color them based on whether Kobe made them or not:
ggplot(train, aes(x = loc_x, y = loc_y)) +
  geom_point(aes(color = shot_made_flag), alpha = 0.5, size = 0.5) +
  ylim(c(-50, 400)) +
  theme_void() +
  scale_color_brewer(palette = "Set1") +
  facet_grid(~ shot_made_flag) +
  labs(title = "Shots Made(Blue) vs. Shots Missed(Red)")

# Let`s plot shot Distribution by x_bins
train$x_bins <- cut(train$loc_x, breaks = 25)
pplot(x_bins) + geom_bar() + ggtitle("Shot Distribution by x_bins") +
  theme(axis.text.x = element_blank())

pplot(x_bins) + theme(axis.text.x = element_blank())

train %>% count(action_type) %>%
  arrange(desc(n)) %>% filter(n < 20) -> actions
train$action_type[train$action_type %in% actions$action_type] <- "Other"

prop.table(table(train$action_type, train$shot_made_flag),1) -> temp
as.data.frame.matrix(temp) -> temp
temp$shot <- rownames(temp)
ggplot(temp, aes(x = reorder(shot, `1`), y = 1)) +
  geom_point(aes(y = `1`), size = 3, color = " dark blue", stat = "identity") +
  coord_flip() +
  labs(y = "Accuracy", x = "", title = "Accuracy by Shot_type")

#Let’s also plot the different spatial features:
courtplot(shot_zone_area)
courtplot(shot_zone_basic) 
courtplot(shot_zone_range)

#And let’s look at different factors plotted by accuracy:
pplot(minutes_remaining)
pplot(period)
pplot(seconds_remaining)

#Let’s also take a look at the histogram by seconds_remaining
pplot(seconds_remaining) + geom_bar() + ggtitle("Histogram of Shots by second_remaining")
pplot(season) + coord_flip()
pplot(shot_distance) + xlim(0, 60)
pplot(combined_shot_type)
pplot(shot_type)
pplot(shot_zone_area) + coord_flip()
pplot(shot_zone_basic) + coord_flip()
pplot(opponent) + coord_flip()

