#Importing functions:
source("~/R/software-history/scripts/utils.R")
#Rqs Results
filenames = list.files(
  path = paste0("~/R/analysis/results_rqs"),
  full.names = TRUE,
  recursive = TRUE
)
results_frame <- do.call(rbind, lapply(filenames, function(i) {
  read.csv(i, stringsAsFactors = FALSE)
}))
#Insertions
filenames = list.files(
  path = paste0("~/R/analysis/results_insertions"),
  full.names = TRUE,
  recursive = TRUE
)
methods_insertions <- do.call(rbind, lapply(filenames, function(i) {
  read.csv(i, stringsAsFactors = FALSE)
}))
#Total Insertions
filenames = list.files(
  path = paste0("~/R/analysis/results"),
  full.names = TRUE,
  recursive = TRUE
)
methods_total <- do.call(rbind, lapply(filenames, function(i) {
  read.csv(i, stringsAsFactors = FALSE)
}))
#Final Data Frame
data_plot <- as.data.frame(
  results_frame %>%
    dplyr::group_by(Project, NoCommits) %>%
    dplyr::summarise(
      noTimeSeries = max(noTimeSeries),
      noChanges = sum(noChanges),
      noBugsIns = sum(noBugsIns),
      BugsByTs = sum(noBugsIns) / max(noTimeSeries),
      BugsByChanges = sum(noBugsIns) / sum(noChanges),
      ChangesByTs = sum(noChanges) / max(noTimeSeries)
    ) %>%
    dplyr::mutate(
      PercBugsByTs = (BugsByTs / sum(BugsByTs)) * 100,
      PercBugsByCommit = (noBugsIns / sum(noBugsIns)) * 100,
      PercBugsByChanges = (BugsByChanges / sum(BugsByChanges)) * 100,
      PercTs = (noTimeSeries / sum(noTimeSeries)) * 100,
      PercChangesByTs = (ChangesByTs / sum(ChangesByTs)) * 100,
      PercChanges = (noChanges / sum(noChanges)) * 100
    ) %>%
    dplyr::ungroup()
)
data_plot[data_plot$Project == "Elasticsearch-hadoop",c("Project")] <- "Es. Hadoop"
data_plot[data_plot$Project == "Signal-Android",c("Project")] <- "S. Android"
data_plot[data_plot$Project == "Material Drawer",c("Project")] <- "M. Drawer"
data_plot[data_plot$Project == "Material-Dialogs",c("Project")] <- "M. Dialogs"
projects <- unique(data_plot$Project)

(G1)To investigate the occurrence of commits along the history;

(Q1.1) How often are commits performed?

p1 <- ggplot(filter(data_plot, Project %in% projects[1:4], NoCommits <= 10 )) +
  geom_bar(aes(x=NoCommits,y=PercTs,group=Project), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual") + scale_x_discrete(name ="Number of Commits", limits=seq(1,10)) +
  #theme(axis.title.x=element_blank(), axis.title.y = element_blank()) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,hjust=.5,vjust=.5,face="plain"))
p2 <- ggplot(filter(data_plot, Project %in% projects[5:7], NoCommits <= 10 )) +
  geom_bar(aes(x=NoCommits,y=PercTs,group=Project), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual") + scale_x_discrete(name ="Number of Commits", limits=seq(1,10)) +
  #theme(axis.title.x=element_blank(), axis.title.y = element_blank()) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,hjust=.5,vjust=.5,face="plain"))
ggarrange(p1 , p2 , ncol = 1, nrow = 2)

(Q1.2) How often are changes performed among commits?

#Percentual
p1 <- ggplot(filter(data_plot, Project %in% projects[1:4], NoCommits <= 10 )) +
  geom_bar(aes(x=NoCommits,y=PercChanges,group=Project), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual") + scale_x_discrete(name ="Number of Commits", limits=seq(1,10)) +
  #theme(axis.title.x=element_blank(), axis.title.y = element_blank()) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,hjust=.5,vjust=.5,face="plain"))
p2 <- ggplot(filter(data_plot, Project %in% projects[5:7], NoCommits <= 10 )) +
  geom_bar(aes(x=NoCommits,y=PercChanges,group=Project), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual") + scale_x_discrete(name ="Number of Commits", limits=seq(1,10)) +
  #theme(axis.title.x=element_blank(), axis.title.y = element_blank()) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,hjust=.5,vjust=.5,face="plain"))
ggarrange(p1 , p2 , ncol = 1, nrow = 2)

(Q1.3) How often are changes performed among time series?

#Percentual:
p1 <- ggplot(filter(data_plot, Project %in% projects[1:4], NoCommits <= 10 )) +
  geom_bar(aes(x=NoCommits,y=PercChangesByTs,group=Project), position = "dodge", stat = "identity") +
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual") + scale_x_discrete(name ="Number of Commits", limits=seq(1,10)) +
  #theme(axis.title.x=element_blank(), axis.title.y = element_blank()) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,hjust=.5,vjust=.5,face="plain"))
p2 <- ggplot(filter(data_plot, Project %in% projects[5:7], NoCommits <= 10 )) +
  geom_bar(aes(x=NoCommits,y=PercChangesByTs,group=Project), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual") + scale_x_discrete(name ="Number of Commits", limits=seq(1,10)) +
  #theme(axis.title.x=element_blank(), axis.title.y = element_blank()) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,hjust=.5,vjust=.5,face="plain"))
ggarrange(p1 , p2 , ncol = 1, nrow = 2)

(G2)To assess the introduction of bugs along the history;

(Q2.1) How often are bugs introduced among the commits?

#Perc
p1 <- ggplot(filter(data_plot, Project %in% projects[1:4], NoCommits <= 10 )) +
  geom_bar(aes(x=NoCommits,y=PercBugsByCommit,group=Project), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual") + scale_x_discrete(name ="Number of Commits", limits=seq(1,10)) +
  #theme(axis.title.x=element_blank(), axis.title.y = element_blank()) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,hjust=.5,vjust=.5,face="plain"))
p2 <- ggplot(filter(data_plot, Project %in% projects[5:7], NoCommits <= 10 )) +
  geom_bar(aes(x=NoCommits,y=PercBugsByCommit,group=Project), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual") + scale_x_discrete(name ="Number of Commits", limits=seq(1,10)) +
  #theme(axis.title.x=element_blank(), axis.title.y = element_blank()) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,hjust=.5,vjust=.5,face="plain"))
ggarrange(p1 , p2 , ncol = 1, nrow = 2)

(Q2.2) How often are bugs introduced among the time series?

#Percentual:
p1 <- ggplot(filter(data_plot, Project %in% projects[1:4], NoCommits <= 10 )) +
  geom_bar(aes(x=NoCommits,y=PercBugsByTs,group=Project), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual") + scale_x_discrete(name ="Number of Commits", limits=seq(1,10)) +
  #theme(axis.title.x=element_blank(), axis.title.y = element_blank()) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,hjust=.5,vjust=.5,face="plain"))
p2 <- ggplot(filter(data_plot, Project %in% projects[5:7], NoCommits <= 10 )) +
  geom_bar(aes(x=NoCommits,y=PercBugsByTs,group=Project), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual") + scale_x_discrete(name ="Number of Commits", limits=seq(1,10)) +
  #theme(axis.title.x=element_blank(), axis.title.y = element_blank()) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,hjust=.5,vjust=.5,face="plain"))
ggarrange(p1 , p2 , ncol = 1, nrow = 2)

(Q2.3) How often are bugs introduced among the changes?

#Perc:
p1 <- ggplot(filter(data_plot, Project %in% projects[1:4], NoCommits <= 10 )) +
  geom_bar(aes(x=NoCommits,y=PercBugsByChanges,group=Project), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual") + scale_x_discrete(name ="Number of Commits", limits=seq(1,10)) +
  #theme(axis.title.x=element_blank(), axis.title.y = element_blank()) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,hjust=.5,vjust=.5,face="plain"))
p2 <- ggplot(filter(data_plot, Project %in% projects[5:7], NoCommits <= 10 )) +
  geom_bar(aes(x=NoCommits,y=PercBugsByChanges,group=Project), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual") + scale_x_discrete(name ="Number of Commits", limits=seq(1,10)) +
  #theme(axis.title.x=element_blank(), axis.title.y = element_blank()) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,hjust=.5,vjust=.5,face="plain"))
ggarrange(p1 , p2 , ncol = 1, nrow = 2)

(G4) To analyse the relation between the history of changes and bugs;

(Q4.1) Is a high number of changes an indicative of bugs introduction?

#Changes Chart
methods_changes <- filter(methods_total, changeType == 'All', groupMetric == 'All', Metric == 'All')
#Calculing peaks
methods_changes[,"totalofBugsIns"] <- maply(methods_changes$bugsInsertion, getTotalBugsIns)
#Filtering only ts with bugs and more with one commit
methods_changes <- filter(methods_changes, totalofBugsIns > 0, NtimeofCommits > 1)
#Generating id of data frame
methods_changes <- genId(methods_changes)
#List of changes and bugs insertions
listofPeaks <<- methods_changes$elementsValue
listofBugsIns <<- methods_changes$bugsInsertion
#Creating columns
methods_changes[,c("peaks", "tp", "fp", "fn", "tn", "Precision", "Recall", "Fmeasure")] <- 0 
#calculating confusion matrix
methods_changes[,c("peaks", "tp", "fp", "fn", "tn", "Precision", "Recall", "Fmeasure")]  <-  plyr::ldply(methods_changes$id, getStatsProcess)
#Grouping data
confusion_matrix <- as.data.frame(
  methods_changes %>%
    dplyr::group_by(Project) %>%
    dplyr::summarise(
      Precision = median(Precision, na.rm = TRUE),
      Recall = median(Recall, na.rm = TRUE),
      Fmeasure = median(Fmeasure, na.rm = TRUE)
    )
)
#Pivot data
data_plot <- melt(confusion_matrix, id = (c("Project")))
colnames(data_plot) <- c("Project", "Measure", "Values")
#Ploting results
ggplot(data_plot) +
  geom_bar(aes(x = Project, y = Values, fill = Measure, group = Measure), position = "dodge", stat = "identity") +
  geom_text(aes(x = Project, y = Values, label = round(Values), group = Measure),  
             check_overlap = TRUE, position = position_dodge(width = 1), vjust = -0.5, size = 3) + #scale_fill_grey() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(legend.direction="vertical", legend.title = element_blank(),
        axis.title.x=element_blank(), legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA)) +
  #ylab("Frequency") +
  #coord_cartesian(xlim=c(1,5), ylim=c(0, 50)) +
  scale_fill_manual(values=cbPalette)

Results

confusion_matrix

(Q4.1) Is a high number of changes an indicative of bugs introduction in Granger positives cases?

#Granger
data_granger <- filter(methods_changes, P1 == 1, P2 == 1, P3 == 1, P4 == 1, P5 == 1, GrangerPos == 1)
#Grouping data
confusion_matrix <- as.data.frame(
  data_granger %>%
    dplyr::group_by(Project) %>%
    dplyr::summarise(
      Precision = median(Precision, na.rm = TRUE),
      Recall = median(Recall, na.rm = TRUE),
      Fmeasure = median(Fmeasure, na.rm = TRUE)
    )
)
confusion_matrix
