Project Signal-Android

#Importing functions:
source("~/R/software-history/scripts/utils.R")
project <- "Signal-Android"
#Csv's:
results_frame <- 
  read.csv(
    paste0("/home/r4ph/R/analysis/results/",project,"_resultsRqsBugsIns.csv"), 
    stringsAsFactors = FALSE
  )
methods_insertions <-
  read.csv(
    paste0("~/R/analysis/results_insertions/",project,"_insertionTs.csv"),
    stringsAsFactors = FALSE
  )
methods_total<-
  read.csv(
    paste0("~/R/analysis/results/",project,"_changesTsWithBugsIns.csv"),
    stringsAsFactors = FALSE
  )
results_frame$Project <- project

Raw Dataset:

Aqui é apresentado o dataset que foi discutido na ultima reunião conforme planilha que o Biel gerou no Google Drive. Tal dataset consiste na relação entre os valores de mudanças e inserção de bugs commit. Segue uma descrição das respectivas colunas:

NoCommits : A Quantidade de commits (k) que a time serie possui;

commit : O respectivo commit dentro da time series com k commits;

NoTimeSeries : Quantidade de time series no commit;

NoBugsIns : Quantidade de bugs inseridos no respectivo commit;

noChanges : Quantidade de mudanças;

percBugs : Percentual de bugs inseridos [Formula noBugsIns/sum(noBugsIns)];

percChanges : Percentual de mudanças [Formula: noChanges/sum(noChanges) ];

Obs: Ressaltando que os dados no dataset encontram-se desnormalizados, portanto há uma possibilidade de repetição de valores em algumas colunas, ex: na coluna NoTimeSeries, na linha 2 e 3 existe o valor 1681 de maneira repetida, mas no cálculo das respectivas metricas é utilizado somente um valor, visto que existem somente 1681 time series com 2 commits e não o dobro (soma das linhas 2 e 3).

results_frame

Final Dataset:

Aqui é apresentado o dataset com as metricas calculadas com o objetivo de gerar os respectivos gráficos presentes na Goal 1 e 2. Segue uma descrição das colunas e suas respectivas formulas:

NoCommits : A Quantidade de commits (k) que a time serie possui;

NoTimeSeries : Quantidade de time series no commit;

noChanges : Quantidade de mudanças;

NoBugsIns : Quantidade de bugs inseridos no respectivo commit;

BugsByTs : Quantidade de bugs por time series [Formula : sum(noBugsIns)/max(noTimeSeries) ];

BugsByChanges :Quantidade de mudanças por bugs [Formula: sum(noBugsIns)/ sum(noChanges)];

ChangesByTs: Quantidade de mudanças pro time series [Formula: sum(noChanges)/ max(noTimeSeries)];

PercBugsByTs: Percentual de bugs por time series [Formula: (BugsByTs/sum(BugsByTs)) * 100];

PercBugsByCommit: Percentual de Bugs por commits [Formula: (noBugsIns/sum(noBugsIns)) * 100];

PercBugsByChanges: Percentual de Bugs por mudanças [Formula: (BugsByChanges/sum(BugsByChanges)) * 100];

PercTs: Percentual de time series [Formula: noTimeSeries / sum(noTimeSeries)) * 100];

PercChangesByTs: Percentual de mudanças por time series [Formula: (ChangesByTs / sum(ChangesByTs)) * 100];

PercChanges: Percentual de mudanças [Formula: (noChanges / sum(noChanges)) * 100];

Obs: Conforme explicado, os dados anteriores (raw dataset) tem algumas colunas desnormalizadas (NoTimeSeries), portanto algumas formulas contém o valor maximo da respectiva coluna dentro do agrupamento definido.

#Final Data Frame
data_plot <- as.data.frame(
  results_frame %>%
    dplyr::group_by(Project, NoCommits) %>%
    dplyr::summarise(
      noTimeSeries = max(noTimeSeries),
      noChanges = sum(noChanges),
      noBugsIns = sum(noBugsIns),
      BugsByTs = sum(noBugsIns) / max(noTimeSeries),
      BugsByChanges = sum(noBugsIns) / sum(noChanges),
      ChangesByTs = sum(noChanges) / max(noTimeSeries)
    ) %>%
    dplyr::mutate(
      PercBugsByTs = (BugsByTs / sum(BugsByTs)) * 100,
      PercBugsByCommit = (noBugsIns / sum(noBugsIns)) * 100,
      PercBugsByChanges = (BugsByChanges / sum(BugsByChanges)) * 100,
      PercTs = (noTimeSeries / sum(noTimeSeries)) * 100,
      PercChangesByTs = (ChangesByTs / sum(ChangesByTs)) * 100,
      PercChanges = (noChanges / sum(noChanges)) * 100
    ) %>%
    dplyr::ungroup()
)
data_plot

(G1)To investigate the occurrence of commits along the time series;

(Q1.1) How often are commits performed along the time series?

#Frequency:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=noTimeSeries,group=Project), position = "dodge", stat = "identity") +
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Frequency of Time Series by Commits") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

#Percentual
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=PercTs,group=Project), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual of Time Series by Commits") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

(Q1.2) How often are changes performed among commits?

#Frequency:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=noChanges), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Frequency of Changes by Commits") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

#Percentual
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=PercChanges), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Changes by Commits") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

(Q1.3) How often are changes performed among time series?

#Frequency:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=ChangesByTs), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Frequency of Changes by Time Seris") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

#Percentual:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=PercChangesByTs), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Changes by Time Seris") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

(G2)To assess the introduction of bugs along the history;

(Q2.1) How often are bugs introduced among the commits?

#Freq:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=noBugsIns), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Bugs by Commits") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

#Perc
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=PercBugsByCommit), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Bugs by Commits") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

(Q2.2) How often are bugs introduced among the time series?

#Frequency:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=BugsByTs), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Bugs by Time Series") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

#Percentual:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=PercBugsByTs), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Bugs by Time Series") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

(Q2.3) How often are bugs introduced among the changes?

#Freq:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=BugsByChanges), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Bugs by Changes") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

#Perc:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=PercBugsByChanges), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Bugs by Changes") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

(G4) To analyse the relation between the history of changes and bugs;

(Q4.1) Is a high number of changes an indicative of bugs introduction?

#Changes Chart
methods_changes <- filter(methods_total, changeType == 'All', groupMetric == 'All', Metric == 'All')
#Calculing peaks
methods_changes[,"totalofBugsIns"] <-   maply(methods_changes$bugsInsertion, getTotalBugsIns)
#Filtering only ts with bugs and more with one commit
methods_changes <- filter(methods_changes, totalofBugsIns > 0, NtimeofCommits > 1)
#Generating id of data frame
methods_changes <- genId(methods_changes)
#List of changes and bugs insertions
listofPeaks <<- methods_changes$elementsValue
listofBugsIns <<- methods_changes$bugsInsertion
#Creating columns
methods_changes[,c("peaks", "tp", "fp", "fn", "tn", "precision", "recall", "fmeasure")] <- 0 
#calculating confusion matrix
methods_changes[,c("peaks", "tp", "fp", "fn", "tn", "precision", "recall", "fmeasure")]  <-  plyr::ldply(methods_changes$id, getStatsProcess)
#Grouping data
confusion_matrix <- as.data.frame(
  methods_changes %>%
    dplyr::group_by(Project) %>%
    dplyr::summarise(
      precision = median(precision, na.rm = TRUE),
      recall = median(recall, na.rm = TRUE),
      fmeasure = median(fmeasure, na.rm = TRUE)
    )
)
#Pivot data
confusion_matrix <- melt(dfPeaks, id = (c("Project")))
colnames(confusion_matrix) <- c("Project", "Measure", "Values")
#Ploting results
ggplot(confusion_matrix) +
  geom_bar(aes(x = Measure, y = Values), position = "dodge", stat = "identity") +
  geom_text(aes(x = Measure, y = Values, label = round(Values,2)),  
            check_overlap = TRUE, position = position_dodge(width = 1), vjust = -0.5, size = 3) +  
  theme(legend.direction="vertical", legend.title = element_blank(), legend.text=element_text(size=8), 
        legend.background = element_rect(fill = "transparent", colour = NA), axis.ticks.x = ) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA)) + 
  ggtitle(project) + ylab("Percentual") 

---
title: "R Notebook"
output: html_notebook
---

### Project Signal-Android

```{r}
#Importing functions:
source("~/R/software-history/scripts/utils.R")

project <- "Signal-Android"

#Csv's:
results_frame <- 
  read.csv(
    paste0("/home/r4ph/R/analysis/results/",project,"_resultsRqsBugsIns.csv"), 
    stringsAsFactors = FALSE
  )

methods_insertions <-
  read.csv(
    paste0("~/R/analysis/results_insertions/",project,"_insertionTs.csv"),
    stringsAsFactors = FALSE
  )

methods_total<-
  read.csv(
    paste0("~/R/analysis/results/",project,"_changesTsWithBugsIns.csv"),
    stringsAsFactors = FALSE
  )

results_frame$Project <- project

```


###Raw Dataset:
Aqui é apresentado o dataset que foi discutido na ultima reunião conforme planilha que o Biel gerou no Google Drive. Tal dataset consiste na relação entre os valores de mudanças e inserção de bugs commit. Segue uma descrição das respectivas colunas:

**NoCommits** : A Quantidade de commits (k) que a time serie possui;

**commit** : O respectivo commit dentro da time series com k commits;

**NoTimeSeries** : Quantidade de time series no commit;

**NoBugsIns** : Quantidade de bugs inseridos no respectivo commit;

**noChanges** : Quantidade de mudanças;

**percBugs** : Percentual de bugs inseridos [Formula noBugsIns/sum(noBugsIns)];

**percChanges** : Percentual de mudanças [Formula: noChanges/sum(noChanges) ];


Obs: Ressaltando que os dados no dataset encontram-se desnormalizados, portanto há uma possibilidade de repetição  de valores em algumas colunas, ex: na coluna NoTimeSeries, na linha 2 e 3 existe o valor 1681 de maneira repetida, mas no cálculo das respectivas metricas é utilizado somente um valor, visto que existem somente 1681 time series com 2 commits e não o dobro (soma das linhas 2 e 3).

```{r}
results_frame

```

###Final Dataset:
Aqui é apresentado o dataset com as metricas calculadas com o objetivo de gerar os respectivos gráficos presentes na Goal 1 e 2. Segue uma descrição das colunas e suas respectivas formulas:

**NoCommits** : A Quantidade de commits (k) que a time serie possui;

**NoTimeSeries** : Quantidade de time series no commit;

**noChanges** : Quantidade de mudanças;

**NoBugsIns** : Quantidade de bugs inseridos no respectivo commit;

**BugsByTs** : Quantidade de bugs por time series [Formula : sum(noBugsIns)/max(noTimeSeries) ];

**BugsByChanges** :Quantidade de mudanças por bugs [Formula: sum(noBugsIns)/ sum(noChanges)];

**ChangesByTs**: Quantidade de mudanças pro time series [Formula: sum(noChanges)/ max(noTimeSeries)];

**PercBugsByTs**: Percentual de bugs por time series [Formula: (BugsByTs/sum(BugsByTs)) * 100];

**PercBugsByCommit**: Percentual de Bugs por commits [Formula: (noBugsIns/sum(noBugsIns)) * 100];

**PercBugsByChanges**: Percentual de Bugs por mudanças [Formula: (BugsByChanges/sum(BugsByChanges)) * 100];

**PercTs**: Percentual de time series [Formula: noTimeSeries / sum(noTimeSeries)) * 100];

**PercChangesByTs**: Percentual de mudanças por time series [Formula: (ChangesByTs / sum(ChangesByTs)) * 100];

**PercChanges**: Percentual de mudanças [Formula:  (noChanges / sum(noChanges)) * 100];

Obs: Conforme explicado, os dados anteriores (raw dataset) tem algumas colunas desnormalizadas (NoTimeSeries), portanto algumas formulas contém o valor maximo da respectiva coluna dentro do agrupamento definido.

```{r}
#Final Data Frame
data_plot <- as.data.frame(
  results_frame %>%
    dplyr::group_by(Project, NoCommits) %>%
    dplyr::summarise(
      noTimeSeries = max(noTimeSeries),
      noChanges = sum(noChanges),
      noBugsIns = sum(noBugsIns),
      BugsByTs = sum(noBugsIns) / max(noTimeSeries),
      BugsByChanges = sum(noBugsIns) / sum(noChanges),
      ChangesByTs = sum(noChanges) / max(noTimeSeries)
    ) %>%
    dplyr::mutate(
      PercBugsByTs = (BugsByTs / sum(BugsByTs)) * 100,
      PercBugsByCommit = (noBugsIns / sum(noBugsIns)) * 100,
      PercBugsByChanges = (BugsByChanges / sum(BugsByChanges)) * 100,
      PercTs = (noTimeSeries / sum(noTimeSeries)) * 100,
      PercChangesByTs = (ChangesByTs / sum(ChangesByTs)) * 100,
      PercChanges = (noChanges / sum(noChanges)) * 100
    ) %>%
    dplyr::ungroup()
)

data_plot
```


### (G1)To investigate the occurrence of commits along the time series; 
####(Q1.1) How often are commits performed along the time series?
```{r}

#Frequency:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=noTimeSeries,group=Project), position = "dodge", stat = "identity") +
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Frequency of Time Series by Commits") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

#Percentual
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=PercTs,group=Project), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  ylab("Percentual of Time Series by Commits") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))


```



####(Q1.2) How often are changes performed among commits?
```{r}

#Frequency:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=noChanges), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Frequency of Changes by Commits") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))


#Percentual
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=PercChanges), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Changes by Commits") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

```

####(Q1.3) How often are changes performed among time series?
```{r}

#Frequency:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=ChangesByTs), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Frequency of Changes by Time Seris") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

#Percentual:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=PercChangesByTs), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Changes by Time Seris") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

```

### (G2)To assess the introduction of bugs along the history; 

#### (Q2.1) How often are bugs introduced among the commits?
```{r}

#Freq:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=noBugsIns), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Bugs by Commits") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

#Perc
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=PercBugsByCommit), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Bugs by Commits") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))
```

#### (Q2.2) How often are bugs introduced among the time series?
```{r}

#Frequency:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=BugsByTs), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Bugs by Time Series") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

#Percentual:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=PercBugsByTs), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Bugs by Time Series") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))
```

#### (Q2.3) How often are bugs introduced among the changes?
```{r}

#Freq:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=BugsByChanges), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Bugs by Changes") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))

#Perc:
ggplot(data_plot) +
  geom_bar(aes(x=NoCommits,y=PercBugsByChanges), position = "dodge", stat = "identity")+
  facet_grid(.~Project,scales="free") + 
  theme(legend.position= c(0.7, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        legend.text=element_text(size=5), legend.background = element_rect(fill = "transparent", colour = NA)) +
  ylab("Percentual of Bugs by Changes") + scale_x_discrete(name ="Number of Commits", limits=seq(1,max(data_plot$NoCommits))) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA),
        axis.text.x = element_text(colour="grey20",size=5,angle=90,hjust=.5,vjust=.5,face="plain"))
```

###(G3) To analyse the kind of changes are mostly related to the bugs insertion;

####(Q3.1) Which are the types (Add/Del) of changes more frequent in the bugs insertion? 


```{r}

#Changes Chart
methods_changes <- filter(methods_insertions, changeType != 'All', groupMetric == 'All', Metric == 'All')

methods_changes[methods_changes$changeType == "changeMetricsAddition", c("changeType")] <- "Addition"
methods_changes[methods_changes$changeType == "changeMetricsDeletion", c("changeType")] <- "Deletion"

cbPalette <- c("#009E73", "#F15854")

methods_changes <-  as.data.frame(
  methods_changes %>%
    dplyr::group_by(Project, changeType) %>%
    dplyr::summarise(totalChanges = sum(Count)) %>%
    mutate(Percentual = totalChanges / sum(totalChanges) * 100) %>%
    ungroup()
)

ggplot(methods_changes) +
  geom_bar(aes(x = Project, y = Percentual, fill = changeType, group = changeType), position = "dodge", stat = "identity") +
  geom_text( aes(x = Project, y = Percentual, label = round(Percentual), group = changeType),  
             check_overlap = TRUE, position = position_dodge(width = 1), vjust = -0.5, size = 3) + #scale_fill_grey() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(legend.position= c(0.82, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        axis.title.x=element_blank(), legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA)) +
  scale_fill_manual(values=cbPalette)

```

####(Q3.2) Which are the metrics groups more frequent in the bugs insertion? 

```{r}
#Filter:D
methods_groups<- filter(methods_insertions, groupMetric != "RM", changeType == 'All', groupMetric != 'All', Metric == 'All')

#Group measures
methods_groups <-  as.data.frame(
  methods_groups %>%
    dplyr::group_by(Project, groupMetric) %>%
    dplyr::summarise(Changes = sum(Count)) %>%
    dplyr::mutate(Percentual = Changes / sum(Changes) * 100) %>%
    dplyr::ungroup()
)

#Plot
  ggplot(methods_groups) +
    geom_bar(aes(x = reorder(groupMetric, -Percentual), y = Percentual), position = "dodge", stat = "identity") +
    geom_text( aes(x = reorder(groupMetric, -Percentual), y = Percentual, label = round(Percentual)),  
               check_overlap = TRUE, position = position_dodge(width = 1), vjust = -0.5, size = 3) + #scale_fill_grey() + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1, size= 8)) +
    theme(legend.position= c(0.82, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
          axis.title.x=element_blank(), legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
    theme(panel.grid.minor = element_blank(), 
          panel.grid.major = element_blank(),
          plot.background = element_rect(fill = "transparent", colour = NA))

```



####(Q3.3) Which are the change metrics more frequent in the bugs insertion? 

```{r}
#Filter:D
methods_ind <- filter(methods_insertions, groupMetric != "RM", changeType == 'All', groupMetric != 'All', Metric != 'All')

#ORder metrics by frequency
freqs_metrics <- plyr::count(methods_ind$Metric)
freqs_metrics <- freqs_metrics[order(-freqs_metrics$freq), ]

#Grouping data
methods_ind <-  as.data.frame(
  methods_ind %>%
    dplyr::group_by(Project, Metric) %>%
    dplyr::summarise(Changes = sum(Count)) %>%
    dplyr::mutate(Percentual = Changes / sum(Changes) * 100) %>%
    dplyr::ungroup()
)

#Top metrics
top_metrics <- head(freqs_metrics, 15)
methods_ind <- methods_ind[methods_ind$Metric %in% top_metrics$x, ]

#Plot
ggplot(methods_ind) +
  geom_bar(aes(x = reorder(Metric, -Percentual), y = Percentual), position = "dodge", stat = "identity") +
  #geom_text( aes(x = reorder(Metric, -Percentual), y = Percentual, label = round(Percentual)),  
  #          check_overlap = TRUE, position = position_dodge(width = 1), vjust = -0.5, size = 3) + #scale_fill_grey() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7)) +
  theme(legend.position= c(0.82, 0.95), legend.direction="horizontal", legend.title = element_blank(), 
        axis.title.x=element_blank(), legend.text=element_text(size=7), legend.background = element_rect(fill = "transparent", colour = NA) ) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA))
```



### (G4) To analyse the relation between the history of changes and bugs;

#### (Q4.1) Is a high number of changes an indicative of bugs introduction?


```{r}
#Changes Chart
methods_changes <- filter(methods_total, changeType == 'All', groupMetric == 'All', Metric == 'All')

#Calculing peaks
methods_changes[,"totalofBugsIns"] <-   maply(methods_changes$bugsInsertion, getTotalBugsIns)

#Filtering only ts with bugs and more with one commit
methods_changes <- filter(methods_changes, totalofBugsIns > 0, NtimeofCommits > 1)

#Generating id of data frame
methods_changes <- genId(methods_changes)

#List of changes and bugs insertions
listofPeaks <<- methods_changes$elementsValue
listofBugsIns <<- methods_changes$bugsInsertion

#Creating columns
methods_changes[,c("peaks", "tp", "fp", "fn", "tn", "precision", "recall", "fmeasure")] <- 0 
#calculating confusion matrix
methods_changes[,c("peaks", "tp", "fp", "fn", "tn", "precision", "recall", "fmeasure")]  <-  plyr::ldply(methods_changes$id, getStatsProcess)

#Grouping data
confusion_matrix <- as.data.frame(
  methods_changes %>%
    dplyr::group_by(Project) %>%
    dplyr::summarise(
      precision = median(precision, na.rm = TRUE),
      recall = median(recall, na.rm = TRUE),
      fmeasure = median(fmeasure, na.rm = TRUE)
    )
)

#Pivot data
confusion_matrix <- melt(dfPeaks, id = (c("Project")))
colnames(confusion_matrix) <- c("Project", "Measure", "Values")

#Ploting results
ggplot(confusion_matrix) +
  geom_bar(aes(x = Measure, y = Values), position = "dodge", stat = "identity") +
  geom_text(aes(x = Measure, y = Values, label = round(Values,2)),  
            check_overlap = TRUE, position = position_dodge(width = 1), vjust = -0.5, size = 3) +  
  theme(legend.direction="vertical", legend.title = element_blank(), legend.text=element_text(size=8), 
        legend.background = element_rect(fill = "transparent", colour = NA), axis.ticks.x = ) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA)) + 
  ggtitle(project) + ylab("Percentual") 

```


