RQ1: How often do changes and bugs occurin the history of methods?

projects <- unique(methods$Project)
typeMethodsData <- methods %>%
  group_by(Project, NtimeofCommits, typeMethod) %>%
  summarise(Changes = sum(Count)) %>% 
  mutate(Percentual = Changes / sum(Changes) * 100) %>% 
  ungroup()
#Amount
for (project in projects) {
  
print(ggplot(filter(typeMethodsData, Project == project, NtimeofCommits <= 10)) +
  geom_bar(aes(x = NtimeofCommits, y = Percentual, fill = typeMethod, group = typeMethod), position = "dodge", stat = "identity") +
  geom_text(aes(x = NtimeofCommits, y = Percentual, label = round(Percentual,2), group = typeMethod),  
            check_overlap = TRUE, position = position_dodge(width = 1), vjust = -0.5, size = 3) +  
  theme(legend.direction="vertical", legend.title = element_blank(), legend.text=element_text(size=6), 
        legend.background = element_rect(fill = "transparent", colour = NA), axis.ticks.x = ) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA)) + 
  ggtitle(paste0("Project ", project)) + ylab("Percentual") + 
  scale_x_discrete(name ="Number of Commits", limits=c("1","2","3","4","5","6","7","8","9","10")) +
  scale_fill_manual(values=cbPalette))
}

#Summarize the type of methods by percentual
typeMethodsDataAll <- methods %>%
  group_by( NtimeofCommits, typeMethod) %>%
  summarise(Changes = sum(Count)) %>% 
  mutate(Percentual = Changes / sum(Changes) * 100) %>% 
  ungroup()
ggplot(filter(typeMethodsDataAll, NtimeofCommits <= 10)) +
    geom_bar(aes(x = NtimeofCommits, y = Percentual, fill = typeMethod, group = typeMethod), position = "dodge", stat = "identity") +
    geom_text(aes(x = NtimeofCommits, y = Percentual, label = round(Percentual,2), group = typeMethod),  
              check_overlap = TRUE, position = position_dodge(width = 1), vjust = -0.5, size = 3) +  
    theme(legend.direction="vertical", legend.title = element_blank(), legend.text=element_text(size=6), 
          legend.background = element_rect(fill = "transparent", colour = NA), axis.ticks.x = ) +
    theme(panel.grid.minor = element_blank(), 
          panel.grid.major = element_blank(),
          plot.background = element_rect(fill = "transparent", colour = NA)) + 
    ggtitle("Plots by Type of Methods") + ylab("Percentual") + 
    scale_x_discrete(name ="Number of Commits", limits=c("1","2","3","4","5","6","7","8","9","10")) +
    scale_fill_manual(values=cbPalette)

#Boxplots por Projetos:
projects <- unique(methods$Project)
ggplot(methods, aes(x=Project, y=NtimeofCommits)) + 
    geom_boxplot() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    scale_x_discrete(name = project)

Explicar as constraints como esse comportamento ocorre ao longo das constraints de maneira profunda, ou seja, explicar porque algumas constraints diminui pouco a % e algumas muito.

resultsC1C4<-
  sqldf(
    "SELECT Project, SUM(Count) as TotalTimeSeries, 
     SUM(CASE WHEN (P2 = 1) AND (P1 = 1) THEN 1 ELSE 0 end) as 'C1',
     SUM(CASE WHEN (P2 = 1) AND (P1 = 1) AND (P4 = 1) THEN 1 ELSE 0 end) as 'C1-C2',
     SUM(CASE WHEN (P2 = 1) AND (P1 = 1) AND (P4 = 1) AND (P5 = 1) THEN 1 ELSE 0 end) as 'C1-C2-C3',
     SUM(CASE WHEN (P2 = 1) AND (P1 = 1) AND (P4 = 1) AND (P3 = 1) THEN 1 ELSE 0 end) as 'C1-C2-C4',
     SUM(CASE WHEN (P2 = 1) AND (P1 = 1) AND (P3 = 1) AND (P4 = 1) AND (P5 = 1) THEN 1 ELSE 0 end) as 'C1-C2-C3-C4'
     FROM methods
     GROUP BY Project"
  )
resultsC1C4
FALSE List of 3
FALSE  $ panel.grid.major: list()
FALSE   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
FALSE  $ panel.grid.minor: list()
FALSE   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
FALSE  $ plot.background :List of 5
FALSE   ..$ fill         : chr "transparent"
FALSE   ..$ colour       : logi NA
FALSE   ..$ size         : NULL
FALSE   ..$ linetype     : NULL
FALSE   ..$ inherit.blank: logi FALSE
FALSE   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
FALSE  - attr(*, "class")= chr [1:2] "theme" "gg"
FALSE  - attr(*, "complete")= logi FALSE
FALSE  - attr(*, "validate")= logi TRUE
FALSE List of 3
FALSE  $ panel.grid.major: list()
FALSE   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
FALSE  $ panel.grid.minor: list()
FALSE   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
FALSE  $ plot.background :List of 5
FALSE   ..$ fill         : chr "transparent"
FALSE   ..$ colour       : logi NA
FALSE   ..$ size         : NULL
FALSE   ..$ linetype     : NULL
FALSE   ..$ inherit.blank: logi FALSE
FALSE   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
FALSE  - attr(*, "class")= chr [1:2] "theme" "gg"
FALSE  - attr(*, "complete")= logi FALSE
FALSE  - attr(*, "validate")= logi TRUE

RQ2: How close is the relationship betweenthe history of bugs and changes?

RQ3: Which kind of changes are mostlyrelated to the bugs insertion?

#IndividualMetrics
colMethods <- c("Project", "MethodName", "Metric", "groupMetric", "changeType" ,"P1", "P2", "P3", "P4", "P5", "elementsValue", "NtimeofCommits", "Count", "GrangerPos")
#resultsClasses <- subset( resultsClasses, select = c(colClasses) )
resultsMethods <- subset( methods, select = c(colMethods) )
#Filter:D
resultsMethods <- filter(resultsMethods, P2 == 1, groupMetric != "RM")
#IndividualMetrics
indMetrics<-
  sqldf(
    "SELECT Project, Metric, SUM(Count) as Changes
    FROM resultsMethods
    WHERE changeType <> 'All' 
    AND groupMetric <>'All'
    AND Metric <> 'All'
    GROUP BY Project, Metric"
  )
metrics <- c()
# Applying the function to the pareto function:
for (project in projects){
  df <- filter(indMetrics, Project == project)[,c(2,3)]
  i<-1
  for (x in 1:nrow(df)){
    for (y in 1:df$Changes[x]){
      metrics[i] <- df$Metric[x]
      i <- i + 1
    }
  }
  ggpareto(metrics, project)
}

#Groups metrics
grpMetrics <- read.csv(file = "~/R/analysis/methods/dataMetricsGranger.csv", stringsAsFactors = FALSE)


metrics <- c()
# Applying the function to the pareto function:
for (project in projects){
  df <- filter(grpMetrics, Project == project)[,c(3,4)]
  i<-1
  for (x in 1:nrow(df)){
    for (y in 1:df$Changes[x]){
      mylist[i] <- df$Metric[x]
      i <- i + 1
    }
  }
  ggpareto(mylist, project)
}

RQ4:

dfQuartils <- as.data.frame(methods %>%
  group_by(Project, typeMethod) %>%
  summarise(quartile1 = sum(quartile1), quartile2 = sum(quartile2), quartile3 = sum(quartile3), 
            quartile4 = sum(quartile4), quartile5 = sum(quartile5)))
tdfQuartils <- melt(dfQuartils, id=(c("Project", "typeMethod")))
colnames(tdfQuartils) <- c("Project", "typeMethod", "Quartiles", "Values")
#Summarize the type of methods by percentual
dfQuartilsProjects <- tdfQuartils %>%
  group_by( Project, Quartiles) %>%
  summarise(Values = sum(Values)) %>% 
  mutate(Percentual = Values / sum(Values) * 100) %>% 
  ungroup()
#Amount
#Changes over time by projects
  
ggplot(dfQuartilsProjects) +
geom_bar(aes(x = Quartiles, y = Percentual, fill = Project, group = Project), position = "dodge", stat = "identity") +
geom_text(aes(x = Quartiles, y = Percentual, label = round(Percentual,2), group = Project),  
          check_overlap = TRUE, position = position_dodge(width = 1), vjust = -0.5, size = 3) +  
theme(legend.direction="vertical", legend.title = element_blank(), legend.text=element_text(size=6), 
      legend.background = element_rect(fill = "transparent", colour = NA), axis.ticks.x = ) +
theme(panel.grid.minor = element_blank(), 
      panel.grid.major = element_blank(),
      plot.background = element_rect(fill = "transparent", colour = NA)) + 
ggtitle(paste0("Project ", project)) + ylab("Percentual") 

#scale_fill_manual(values=cbPalette)
#Summarize the type of methods by percentual
dfQuartilsTypes <- tdfQuartils %>%
  group_by( typeMethod, Quartiles ) %>%
  summarise(Values = sum(Values)) %>% 
  mutate(Percentual = Values / sum(Values) * 100) %>% 
  ungroup()
#Changes over time by types of methods
ggplot(dfQuartilsTypes) +
  geom_bar(aes(x = Quartiles, y = Percentual, fill = typeMethod, group = typeMethod), position = "dodge", stat = "identity") +
  geom_text(aes(x = Quartiles, y = Percentual, label = round(Percentual,2), group = typeMethod),  
            check_overlap = TRUE, position = position_dodge(width = 1), vjust = -0.5, size = 3) +  
  theme(legend.direction="vertical", legend.title = element_blank(), legend.text=element_text(size=6), 
        legend.background = element_rect(fill = "transparent", colour = NA), axis.ticks.x = ) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA)) + 
  ggtitle(paste0("Project ", project)) + ylab("Percentual") +
  scale_fill_manual(values=cbPalette)

dfPeaks <- as.data.frame(methods %>%
                              group_by(Project, typeMethod) %>%
                              summarise(peaksMedian = sum(peaksMedian), peaks1Sd = sum(peaks1Sd), peaks2Sd = sum(peaks2Sd), 
                                        peaks3Sd = sum(peaks3Sd)))
tdfPeaks <- melt(dfPeaks, id=(c("Project", "typeMethod")))
colnames(tdfPeaks) <- c("Project", "typeMethod", "Peaks", "Values")
#Summarize the type of methods by percentual
dfPeaksProjects <- tdfPeaks %>%
  group_by( Project, Peaks) %>%
  summarise(Values = sum(Values)) %>% 
  mutate(Percentual = Values / sum(Values) * 100) %>% 
  ungroup()
#Amount
#Changes over time by projects
ggplot(dfPeaksProjects) +
  geom_bar(aes(x = Project, y = Percentual, fill = Peaks, group = Peaks), position = "dodge", stat = "identity") +
  geom_text(aes(x = Project, y = Percentual, label = round(Percentual,2), group = Peaks),  
            check_overlap = TRUE, position = position_dodge(width = 1), vjust = -0.5, size = 3) +  
  theme(legend.direction="vertical", legend.title = element_blank(), legend.text=element_text(size=6), 
        legend.background = element_rect(fill = "transparent", colour = NA), axis.ticks.x = ) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA)) + 
  ggtitle(paste0("Project ", project)) + ylab("Percentual") +
  scale_fill_manual(values=cbPalette)

dfPeaksTypes <- tdfPeaks %>%
  group_by( typeMethod, Peaks ) %>%
  summarise(Values = sum(Values)) %>% 
  mutate(Percentual = Values / sum(Values) * 100) %>% 
  ungroup()
#Changes over time by types of methods
ggplot(dfPeaksTypes) +
  geom_bar(aes(x = typeMethod, y = Percentual, fill = Peaks, group = Peaks), position = "dodge", stat = "identity") +
  geom_text(aes(x = typeMethod, y = Percentual, label = round(Percentual,2), group = Peaks),  
            check_overlap = TRUE, position = position_dodge(width = 1), vjust = -0.5, size = 3) +  
  theme(legend.direction="vertical", legend.title = element_blank(), legend.text=element_text(size=6), 
        legend.background = element_rect(fill = "transparent", colour = NA), axis.ticks.x = ) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "transparent", colour = NA)) + 
  ggtitle(paste0("Project ", project)) + ylab("Percentual") + 
  scale_fill_manual(values=cbPalette)

