diff --git a/steps/aggregate.sh b/steps/aggregate.sh index 472b34b7..caa86aa8 100755 --- a/steps/aggregate.sh +++ b/steps/aggregate.sh @@ -59,28 +59,6 @@ printf "\n" >> "${all}" echo "All $(wc -l "${all}" | xargs) projects aggregated$("${LOCAL}/help/tdiff.sh" "${start}")" printf "\n" -mkdir -p "${TARGET}/data/aggregation" - -jobs=${TARGET}/temp/jobs/aggregate-function-jobs.txt -rm -rf "${jobs}" -mkdir -p "$(dirname "${jobs}")" -touch "${jobs}" - -for metric in ${metrics}; do - metric_file="${TARGET}/data/${metric}.csv" - if [[ -f "${metric_file}" ]]; then - output_folder="${TARGET}/data/aggregation" - for sh_script in "${LOCAL}/steps/aggregation-functions/"*.sh; do - if [[ -f "${sh_script}" ]]; then - printf "%s %s %s %s\n" "${sh_script@Q}" "${metric_file}" "${output_folder@Q}" "${metric@Q}" >> "${jobs}" - fi - done - fi -done - -"${LOCAL}/help/parallel.sh" "${jobs}" -wait - jobs=${TARGET}/temp/jobs/aggregate-join-jobs.txt rm -rf "${jobs}" mkdir -p "$(dirname "${jobs}")" @@ -98,4 +76,24 @@ done < "${repos}" "${LOCAL}/help/parallel.sh" "${jobs}" wait +mkdir -p "${TARGET}/data/aggregation" +f_jobs=${TARGET}/temp/jobs/aggregate-function-jobs.txt +rm -rf "${f_jobs}" +mkdir -p "$(dirname "${f_jobs}")" +touch "${f_jobs}" + +for metric in ${metrics}; do + metric_file="${TARGET}/data/${metric}.csv" + if [[ -f "${metric_file}" ]]; then + output_folder="${TARGET}/data/aggregation" + for sh_script in "${LOCAL}/steps/aggregation-functions/"*.sh; do + if [[ -f "${sh_script}" ]]; then + printf "%s %s %s %s\n" "${sh_script@Q}" "${metric_file}" "${output_folder@Q}" "${metric@Q}" >> "${f_jobs}" + fi + done + fi +done +"${LOCAL}/help/parallel.sh" "${f_jobs}" +wait + echo "All metrics aggregated and joined in ${total} repositories$("${LOCAL}/help/tdiff.sh" "${start}")" diff --git a/steps/report.sh b/steps/report.sh index ec85f8b6..296dd510 100755 --- a/steps/report.sh +++ b/steps/report.sh @@ -20,6 +20,7 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. + set -e set -o pipefail @@ -85,6 +86,56 @@ done cp "${list}" "${list}.unstructured" mv "${st_list}" "${list}" +# Create the aggregation table LaTeX file +aggregation_table=${TARGET}/temp/aggregation_table.tex +echo > "${aggregation_table}" + +# LaTeX escape function to handle special characters +latex_escape() { + echo "$1" | sed 's/&/\\&/g; s/%/\\%/g; s/_/\\_/g; s/\$/\\\$/g; s/#{}/\\{\\}/g; s/\^/\\^/g; s/~/{\~}/g; s/\\/\\\\/g' +} + +{ + printf "\onecolumn\n" + printf "\\centering\n" + printf "\\\\begin{longtable}{|l|c|c|c|}\n" + printf "\\hline\n" + printf "Metric & 90th Percentile & Mean & Median \\\\\\\\\\\\\ \n" + printf "\\hline\n" +} >> "${aggregation_table}" + +shopt -s nullglob +files=("${TARGET}/data/aggregation/"*.csv) +shopt -u nullglob + +if [ "${#files[@]}" -gt 0 ]; then + for file in "${files[@]}"; do + metric=$(basename "${file}" | cut -d '.' -f 1) + value=$(<"${file}") + if [[ "${file}" =~ \.90th_percentile\.csv$ ]]; then + percentile="${value}" + mean="" + median="" + elif [[ "${file}" =~ \.mean\.csv$ ]]; then + mean="${value}" + elif [[ "${file}" =~ \.median\.csv$ ]]; then + median="${value}" + fi + percentile=$(latex_escape "${percentile}") + mean=$(latex_escape "${mean}") + median=$(latex_escape "${median}") + if [[ -n "${percentile}" && -n "${mean}" && -n "${median}" ]]; then + printf "%s & %s & %s & %s \\\\\\\\\\\\\ \n" "${metric}" "${percentile}" "${mean}" "${median}" >> "${aggregation_table}" + fi + done +fi + +# Close the LaTeX table +printf "\\hline\n" >> "${aggregation_table}" +printf "\\\\end{longtable}\n" >> "${aggregation_table}" + +printf "Aggregation table generated in %s\n" "${aggregation_table}" + # It's important to make sure the path is absolute, for LaTeX t=$(realpath "${TARGET}") diff --git a/tex/report.tex b/tex/report.tex index 9c558d54..0989faa3 100644 --- a/tex/report.tex +++ b/tex/report.tex @@ -32,6 +32,10 @@ \usepackage{paralist} \usepackage{ffcode} \usepackage[capitalize]{cleveref} +\usepackage{amsmath} % for mathematical symbols +\usepackage{graphicx} % for advanced table formatting +\usepackage{array} % for better table control +\usepackage{longtable} % for better table control \usepackage{silence} \WarningFilter{microtype}{Unable to apply patch `footnote'} @@ -207,11 +211,15 @@ \section{Results}\label{sec:results} \begin{itemize} \input{$TARGET/temp/list-of-metrics.tex} \end{itemize} +Here is a graph of metrics and their aggregated values: +\iexec{cat "${TARGET}/temp/aggregation_table.tex" }\unskip{} + The dataset was built by \iexec{nproc}\unskip{} CPUs\iexec{"${LOCAL}/help/tdiff.sh" "$(cat "${TARGET}/start.txt")"}\unskip{}. + \section{Limitations}\label{sec:limitations} As of January 2023, \citet{dohmke2023} reported that GitHub hosts more than