Commit 1a7b7fb9 authored by Stanley Clark's avatar Stanley Clark
Browse files

More analysis

parent 1682697f
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
This diff is collapsed.
if (!require("pacman")) install.packages("pacman")
pacman::p_load(stringr, ggplot2, reshape2, tikzDevice, R.devices, gridExtra, plyr, ggallin, formattable)
options(tz="Europe/Amsterdam")
if (!require("pacman"))
install.packages("pacman")
pacman::p_load(
stringr,
ggplot2,
reshape2,
tikzDevice,
R.devices,
gridExtra,
plyr,
ggallin,
formattable,
grid,
gtable
)
options(tz = "Europe/Amsterdam")
source("stats.R")
source("plots.R")
# ------ Load clean results ------
results1 <- rbind(get_results(1, "results1.csv"), get_results(10, "results10.csv"))
results1 <- rbind(get_results(1, "results1.csv"))
stats1 <- get_stats(results1)
# ------ Generate graphs -----
# Print plots to the screen
# Execution time per query
grid.arrange(
box_db_query_relative(stats1),
box_db_query_absolute_log(stats1),
ncol = 2, nrow = 1)
grid.arrange(
box_db_query_absolute_log(stats1),
ncol = 2, nrow = 1
# Load clean results
results <- rbind(
get_results(1, "results1.csv"),
get_results(2, "results2.csv")
)
stats <- get_stats(results)
# Without scale
grid.arrange(
diff_plan_time(stats1),
box_plan(stats1),
box_db(stats1),
box_plan_and_db(stats1),
stacked_means(stats1),
ncol=3, nrow=2)
# With scale
grid.arrange(
diff_plan_time(stats1),
box_db(stats1),
box_db_scale(stats1),
box_plan_scale(stats1),
box_plan(stats1),
stacked_means(stats1),
ncol=3, nrow=2)
diff_plan_time(stats1)
bar_diff_plan_time(stats1)
box_db_query_absolute_log(stats1)
box_db_query_relative(stats1)
stacked_means(stats1)
# Print plots to the screen
diff_plan_time()
box_db_query_absolute_log()
box_db_query_relative()
stacked_means()
# Save all graphs to individual tex files
save_half("planning_time_joins-2.tex", diff_plan_time, stats1)
save_half("planning_time_bar-2.tex", bar_diff_plan_time, stats1)
save_half("diff_execution_time_log-2.tex", box_db_query_absolute_log, stats1)
save_half("rel_diff_execution_time-2.tex", box_db_query_relative, stats1)
save_half("stacked_means-2.tex", stacked_means, stats1)
save_third("planning_time_joins-3.tex", diff_plan_time, stats1)
save_third("planning_time_bar-3.tex", bar_diff_plan_time, stats1)
save_third("diff_execution_time_log-3.tex", box_db_query_absolute_log, stats1)
save_third("rel_diff_execution_time-3.tex", box_db_query_relative, stats1)
save_third("stacked_means-3.tex", stacked_means, stats1)
save_half("planning_time_joins-2.tex", diff_plan_time)
save_half("diff_execution_time_log-2.tex", box_db_query_absolute_log)
save_half("rel_diff_execution_time-2.tex", box_db_query_relative)
save_half("stacked_means-2.tex", stacked_means)
save_third("planning_time_joins-3.tex", diff_plan_time)
save_third("diff_execution_time_log-3.tex", box_db_query_absolute_log)
save_third("rel_diff_execution_time-3.tex", box_db_query_relative)
save_to_tex("stacked_means-3.tex", stacked_means, 3.3, 2.2)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
% Created by tikzDevice version 0.12.3.1 on 2020-09-02 18:43:01
% Created by tikzDevice version 0.12.3.1 on 2020-09-03 08:18:38
% !TEX encoding = UTF-8 Unicode
\begin{tikzpicture}[x=1pt,y=1pt]
\definecolor{fillColor}{RGB}{255,255,255}
......@@ -73,13 +73,13 @@
\definecolor{drawColor}{gray}{0.20}
\definecolor{fillColor}{gray}{0.20}
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 72.82, 34.24) circle ( 1.96);
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 72.82, 34.24) circle ( 0.46);
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 32.43, 34.24) circle ( 1.96);
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 32.43, 34.24) circle ( 0.46);
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 42.44, 34.24) circle ( 1.96);
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 42.44, 34.24) circle ( 0.46);
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 72.73, 34.24) circle ( 1.96);
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 72.73, 34.24) circle ( 0.46);
\path[draw=drawColor,line width= 0.6pt,line join=round] ( 65.30, 34.24) -- ( 65.30, 34.24);
......@@ -122,9 +122,9 @@
\path[draw=drawColor,line width= 1.1pt,line join=round] ( 56.57, 56.77) -- ( 56.57, 67.17);
\definecolor{fillColor}{gray}{0.20}
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 39.68, 75.84) circle ( 1.96);
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 39.68, 75.84) circle ( 0.46);
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 39.27, 75.84) circle ( 1.96);
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 39.27, 75.84) circle ( 0.46);
\path[draw=drawColor,line width= 0.6pt,line join=round] ( 58.84, 75.84) -- ( 68.01, 75.84);
......@@ -180,9 +180,9 @@
\path[draw=drawColor,line width= 1.1pt,line join=round] ( 53.56,112.24) -- ( 53.56,122.64);
\definecolor{fillColor}{gray}{0.20}
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 85.33,131.31) circle ( 1.96);
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 85.33,131.31) circle ( 0.46);
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 78.27,131.31) circle ( 1.96);
\path[draw=drawColor,line width= 0.4pt,line join=round,line cap=round,fill=fillColor] ( 78.27,131.31) circle ( 0.46);
\path[draw=drawColor,line width= 0.6pt,line join=round] ( 82.66,131.31) -- ( 83.52,131.31);
......
This diff is collapsed.
......@@ -11,29 +11,22 @@ latex_percent <- function (x) {
stringr::str_c(round(x * 100, 0), "\\%")
}
# 1. Difference between planning times per number of joins
diff_plan_time <- function(stats) {
ggplot(stats$means, aes(x = num_joins, y = plan, group = case)) +
diff_plan_time <- function() {
df <- stats$meansuserid[stats$meansuserid$scale == 2 & stats$meansuserid$query %in% c(3, 96, 7, 26, 19, 25),]
df <- df[,c(3,6,10)]
df <- aggregate(plan ~ case + num_joins, data = df, mean)
ggplot(df, aes(x = num_joins, y = plan, group = case)) +
geom_line(aes(linetype = case)) +
ggplot_theme +
labs(y = "Absolute planning time (ms)", x = "Number of joins", linetype = "Variant") +
scale_linetype_discrete(labels = ggplot_case_labels)
}
# 2. Differences between planning times per query
bar_diff_plan_time <- function(stats) {
ggplot(stats$means, aes(x = query, y = plan, fill = case)) +
geom_bar(stat = 'identity', position = "dodge") +
ggplot_theme +
labs(y = "Planning time (ms)", x = "Query", fill = "Variant") +
scale_fill_discrete(labels = ggplot_case_labels)
}
# 3. Box plot of distribution of mean DB execution times of different user IDs
# 2. Box plot of distribution of mean DB execution times of different user IDs
# between two cases across queries
box_db_query_absolute_log <- function(stats) {
ggplot(stats$means, aes(x = db, y = query, fill = case)) +
box_db_query_absolute_log <- function() {
ggplot(stats$means[stats$means$scale == 2,], aes(x = db, y = query, fill = case)) +
geom_boxplot(outlier.size = 0.1) +
ggplot_theme +
scale_x_continuous(trans = pseudolog10_trans) +
......@@ -43,10 +36,10 @@ box_db_query_absolute_log <- function(stats) {
theme(axis.text.x = element_text(angle = 90))
}
# 4. Box plot of relative increases in DB execution time
box_db_query_relative <- function(stats) {
ggplot(stats$rel_diff, aes(x = db, y = query)) +
geom_boxplot() +
# 3. Box plot of relative increases in DB execution time
box_db_query_relative <- function() {
ggplot(stats$rel_diff[stats$rel_diff$scale == 2, ], aes(x = db, y = query)) +
geom_boxplot(outlier.size = 0.1) +
ggplot_theme +
geom_vline(xintercept = 0, linetype = "dotted") +
scale_x_continuous(labels = latex_percent) +
......@@ -54,23 +47,31 @@ box_db_query_relative <- function(stats) {
scale_fill_discrete(labels = ggplot_case_labels)
}
# 5. Stacked plot of mean value of all components
# This should show how planning time becomes less relevant
# 4. Stacked plot of mean value of all components
# This should show how planning time becomes less relevant
# as the scale factor grows
stacked_means <- function(stats) {
ggplot(stats$meansuseridquery, aes(x = case, y = value, fill = variable)) +
stacked_means <- function() {
p <- ggplot(stats$meansuseridquery, aes(x = case, y = value, fill = variable)) +
geom_bar(stat = 'identity', position = 'stack') +
facet_grid(~ scale) +
ggplot_theme +
labs(x = "Variant", y = "Time (ms)", fill = "Component") +
scale_x_discrete(labels = ggplot_case_labels) +
scale_fill_discrete(labels = c("RA", "A-priori", "Planning", "DB"))
scale_fill_discrete(labels = c("RA", "A-priori", "Planning", "DB")) +
theme(legend.position="right", axis.text.x = element_text(angle = -45))
# Add general facet scale label
z <- ggplotGrob(p)
z <- gtable_add_rows(z, unit(2, "line"), 2)
z <- gtable_add_grob(z, textGrob("Scale Factor", gp=gpar(fontsize=8)), 2, 13, 6, 5)
grid.newpage()
grid.draw(z)
}
# ------- Extra plots not needed but kept for reference -------
# ------- Extra plots not needed but kept for reference -------
box_db_scale <- function(stats) {
box_db_scale <- function() {
ggplot(stats$means, aes(x = db, y = scale, fill = case)) +
geom_boxplot() +
ggplot_theme +
......@@ -78,7 +79,7 @@ box_db_scale <- function(stats) {
scale_fill_discrete(labels = ggplot_case_labels)
}
box_db_query <- function(stats) {
box_db_query <- function() {
ggplot(stats$means, aes(x = db, y = query, fill = case)) +
geom_boxplot() +
ggplot_theme +
......@@ -87,7 +88,7 @@ box_db_query <- function(stats) {
scale_fill_discrete(labels = ggplot_case_labels)
}
box_db <- function(stats) {
box_db <- function() {
ggplot(stats$means, aes(x = db, fill = case)) +
geom_boxplot() +
ggplot_theme +
......@@ -97,7 +98,7 @@ box_db <- function(stats) {
scale_y_discrete(breaks=NULL)
}
box_plan_scale <- function(stats) {
box_plan_scale <- function() {
ggplot(stats$means, aes(x = plan, y = scale, fill = case)) +
geom_boxplot() +
ggplot_theme +
......@@ -105,7 +106,7 @@ box_plan_scale <- function(stats) {
scale_fill_discrete(labels = ggplot_case_labels)
}
box_plan <- function(stats) {
box_plan <- function() {
ggplot(stats$means, aes(x = plan, fill = case)) +
geom_boxplot() +
ggplot_theme +
......@@ -115,7 +116,7 @@ box_plan <- function(stats) {
scale_y_discrete(breaks=NULL)
}
box_diff_plan_time <- function(stats) {
box_diff_plan_time <- function() {
ggplot(stats$means, aes(x = plan, y = query, fill = case)) +
geom_boxplot(outlier.size = 0.1) +
ggplot_theme +
......@@ -124,16 +125,39 @@ box_diff_plan_time <- function(stats) {
scale_fill_discrete(labels = ggplot_case_labels)
}
save_half <- function(filename, plot, stats) {
save_to_tex(filename, plot, stats,width = 3.3, height = 3.3)
plan_time_exponential <- function() {
stats$meansuserid$query2 <- with(stats$meansuserid, reorder(query, plan, function(x) min(x)))
stats$meansuserid1 <- stats$meansuserid[stats$meansuserid$case == 1, ]
stats$meansuserid1 <- stats$meansuserid1[stats$meansuserid1$scale == 1, ]
ggplot(stats$meansuserid1, aes(x = query2, y = plan, group = 1)) +
geom_line() +
ggplot_theme +
labs(y = "Absolute planning time (ms)", x = "Number of joins in join sequences", linetype = "Variant") +
scale_linetype_discrete(labels = ggplot_case_labels) +
scale_x_discrete(labels = c("96 (2 = 12)", "3 (3 = 120)", "90 (1 {3 + 3} = 480)", "7 (4 = 1680)", "26 (4 = 1680)", "46 (2 { 4 } = 1692)", "11 (3 { 2 + 2 + 2 + 2}) = 5760", "19 (5 = 30240)", "25 (7 = 17297280)"))
}
bar_diff_plan_time <- function() {
ggplot(stats$meansuserid[stats$meansuserid$scale == 2,], aes(x = query, y = plan, fill = case)) +
geom_bar(stat = 'identity', position = "dodge") +
ggplot_theme +
labs(y = "Planning time (ms)", x = "Query", fill = "Variant") +
scale_fill_discrete(labels = ggplot_case_labels)
}
# -------- To Tex functions --------
save_half <- function(filename, plot) {
save_to_tex(filename, plot, width = 3.3, height = 3.3)
}
save_third <- function(filename, plot, stats) {
save_to_tex(filename, plot, stats,width = 2.2, height = 2.2)
save_third <- function(filename, plot) {
save_to_tex(filename, plot, width = 2.2, height = 2.2)
}
save_to_tex <- function(filename, plot, stats, width, height) {
tikz(file = file.path("output", filename), width = width, height = width)
print(plot(stats))
save_to_tex <- function(filename, plot, width, height) {
tikz(file = file.path("output", filename), width = width, height = height)
print(plot())
dev.off()
}
......@@ -13,7 +13,6 @@ get_results <- function(scale, filename) {
}
get_stats <- function(results) {
merge_stats <- function(df, ordering) {
stats <- data.frame(
query = c(3, 7, 11, 19, 25, 26, 46, 90, 96),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment