Data Set Up

dir.create("data", showWarnings = FALSE)
dir.create("plots", showWarnings = FALSE)

set.seed(1234)

devtools::load_all("../metricminer/")
## ℹ Loading metricminer
library("ggplot2")
library("dlstats")
library("magrittr")

source(file.path("local_auth_2.R"))
## You chose to cache your credentials, if you change your mind, run metricminer::delete_creds().
##             
##  Be careful not to push .httr-oauth or RDS files to GitHub or share it anywhere.
auth_from_secret("calendly", token = Sys.getenv("METRICMINER_CALENDLY"))

auth_from_secret("google",
                 refresh_token = Sys.getenv("METRICMINER_GOOGLE_REFRESH"),
                 access_token = Sys.getenv("METRICMINER_GOOGLE_ACCESS"),
                 cache = TRUE)
## You chose to cache your credentials, if you change your mind, run metricminer::delete_creds().
##             
##  Be careful not to push .httr-oauth or RDS files to GitHub or share it anywhere.
auth_from_secret("github", token = Sys.getenv("METRICMINER_GITHUB_PAT"))

ga_accounts <- get_ga_user()
## Using user-supplied cached token using authorize("google")
## Auto-refreshing stale OAuth token.
calendly_account <- get_calendly_user()
## Using user-supplied cached token using authorize("calendly")
cbPalette <- c("#E69F00", "#56B4E9", "#CC79A7")

Collect online course data

if (!file.exists(file.path("data", "itcr_ga_metric_data.RDS"))) {
  fhdsl_stats_list <- get_all_ga_metrics(account_id = ga_accounts$id[1])
  itcr_stats_list <- get_all_ga_metrics(account_id = ga_accounts$id[2])
  
  # There's some google analytics that aren't ITCR courses
  not_itcr <- c("hutchdatasci", "whoiswho", "MMDS", "FH Cluster 101", "AnVIL_Researcher_Journey")

  # Set up each data frame 
  ga_metrics <- dplyr::bind_rows(fhdsl_stats_list$metrics ,itcr_stats_list$metrics) %>%
  dplyr::filter(
    !(website %in%not_itcr)
    )
  saveRDS(ga_metrics, file.path("data","itcr_ga_metric_data.RDS"))
  
  ga_dims <- dplyr::bind_rows(fhdsl_stats_list$dimensions, itcr_stats_list$dimensions) %>%
  dplyr::filter(
    !(website %in% not_itcr)
    )
  saveRDS(ga_dims, file.path("data","itcr_ga_dims_data.RDS"))
  
  ga_link_clicks <- dplyr::bind_rows(fhdsl_stats_list$link_clicks,itcr_stats_list$link_clicks) %>%
  dplyr::filter(
    !(website %in% not_itcr)
    )
  saveRDS(ga_link_clicks, file.path("data","itcr_ga_link_click_data.RDS"))
  
} else {
  ga_metrics <- readRDS(file.path("data","itcr_ga_metric_data.RDS"))
  ga_dims <- readRDS(file.path("data","itcr_ga_dims_data.RDS"))
  ga_link_clicks <- readRDS(file.path("data","itcr_ga_link_click_data.RDS"))
}

manual_course_info <- googlesheets4::read_sheet(
  "https://docs.google.com/spreadsheets/d/1-8vox2LzkVKzhmSFXCWjwt3jFtK-wHibRAq2fqbxEyo/edit#gid=1550012125", sheet = "Course_data", 
  col_types = "ccDDDciii") %>% 
  dplyr::mutate_if(is.numeric.Date, lubridate::ymd)
## ✔ Reading from "ITN_collaborations_and_course_metrics".
## ✔ Range ''Course_data''.
## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion

## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion

## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion

## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion

## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion

## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion
# Join this all together
itcr_course_data <- ga_metrics %>% 
  dplyr::left_join(manual_course_info) %>% 
  dplyr::mutate(website = dplyr::case_when(
    website == "Advanced Reproducibility in Cancer Informatics" ~ "Advanced Reproducibility",
                                           TRUE ~ website))
## Joining with `by = join_by(website)`
# Save this to a TSV
readr::write_tsv(itcr_course_data, file.path("data", "itcr_course_metrics.tsv"))

Get OTTR courses

sync_yamls <- c(
  "https://raw.githubusercontent.com/jhudsl/OTTR_Template/main/.github/sync.yml", 
  "https://raw.githubusercontent.com/C-MOOR/C-MOOR_Template//main/.github/sync.yml", 
  "https://raw.githubusercontent.com/jhudsl/AnVIL_Template/main/.github/sync.yml",
  "https://raw.githubusercontent.com/datatrail-jhu/DataTrail_Template/main/.github/sync.yml"
)

extract_repos <- function(yaml) {
  content <- yaml::read_yaml(yaml) 
  repos <- unlist(purrr::map(content$group, "repos"))
  repos <- unlist(strsplit(repos, "\n"))
  return(repos)
}

all_ottr_repos <- unlist(lapply(sync_yamls, extract_repos))

ottr_df <- data.frame(repo_name = all_ottr_repos) %>%
  tidyr::separate(repo_name, into = c("organization", "repo"), sep = "\\/", remove = FALSE) %>% 
  dplyr::mutate(template = dplyr::case_when(
    stringr::str_detect(repo, "template|Template") ~ "template", 
    TRUE ~ "course"
  )) %>% 
  dplyr::distinct()

ottr_df %>% dplyr::count(organization)
##         organization  n
## 1             C-MOOR  3
## 2  PracticalGenomics  1
## 3          abyzovlab  1
## 4      datatrail-jhu 15
## 5              fhdsl 23
## 6        griffithlab  3
## 7             jhudsl 26
## 8          mccoy-lab  1
## 9    opencasestudies  1
## 10            tmm211  1
ottr_df %>% 
  dplyr::filter(template == "course") %>% 
  dplyr::count()
##    n
## 1 69

Collaboration Info

collabs <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1-8vox2LzkVKzhmSFXCWjwt3jFtK-wHibRAq2fqbxEyo/edit#gid=0")
## ✔ Reading from "ITN_collaborations_and_course_metrics".
## ✔ Range 'SuccesfulCollabs'.
nrow(collabs)
## [1] 106
collabs %>% dplyr::count(ITN_ITCR_or_external) %>% 
  dplyr::mutate(perc = n/sum(n))
## # A tibble: 8 × 3
##   ITN_ITCR_or_external             n    perc
##   <chr>                        <int>   <dbl>
## 1 ITCR                            67 0.632  
## 2 ITN                              5 0.0472 
## 3 NCI                              2 0.0189 
## 4 NIH                              2 0.0189 
## 5 external                         2 0.0189 
## 6 external (was after leaving)     1 0.00943
## 7 external NIH/NCI intermural      3 0.0283 
## 8 neither                         24 0.226
collabs <- collabs %>% tidyr::separate_rows("Category", sep = ", ", ) %>% 
  dplyr::mutate(Category = trimws(Category)) %>% 
  dplyr::filter(Category != "?")

nrow(collabs)
## [1] 154
collabs %>% dplyr::count(Category) %>% 
  ggplot2::ggplot(ggplot2::aes(y = n, x = reorder(Category,-n), fill = Category)) +
  ggplot2::geom_bar(position = "dodge", stat = "identity") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 6),
                 legend.position="none", 
                 plot.margin = unit(c(.75,.5,.5,.5), "cm")) + 
  ggplot2::xlab("")

ggplot2::ggsave(file.path("plots", "itn_collaboration_types.png"), width = 7, height = 5)
open_agenda <- readLines("ITCR OPEN Group Agenda.txt")

dates <- grep("20", open_agenda, value = TRUE)
dates <- gsub(" Meeting| meeting", "", dates)
month <- stringr::word(dates, sep = " ")
year <- stringr::word(dates, sep = " ", start = 2)

names <- grep("20", open_agenda)
attendance <- abs(names[1:length(names)] - c(names[2:length(names)], length(open_agenda)))


open_attendance <- data.frame(
  date = lubridate::ym(paste(year,"-", month)),
  attendance)

ggplot2::ggplot(open_attendance, ggplot2::aes(x = date, y = attendance)) + 
  ggplot2::geom_bar(stat = "identity", fill = "lightgreen") +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::geom_text(ggplot2::aes(label = attendance), size = 3, vjust = - 1) 

  ggplot2::ggsave(file.path("plots", "open_attendance.png"), width = 4, height = 3)

Collect Loqui Video Creation User data

Unique users of Loqui

loqui_usage <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1G_HTU-bv2k5txExP8EH3ScUfGqtW1P3syThD84Z-g9k/edit#gid=0")
## ✔ Reading from "Loqui User Data".
## ✔ Range 'Sheet1'.
loqui_usage %>% count(email) %>% dplyr::pull(email) %>% length()
## [1] 18

Number of videos made with Loqui

loqui_usage %>% count(email) %>% dplyr::pull(n) %>% sum()
## [1] 477

Collect Workshop Feedback Info

if (!file.exists(file.path("data", "itcr_slido_data.RDS"))) {
  itcr_drive_id <- "https://drive.google.com/drive/folders/0AJb5Zemj0AAkUk9PVA"
  itcr_slido_data <- get_slido_files(itcr_drive_id)

  saveRDS(itcr_slido_data, file.path("data", "itcr_slido_data.RDS"))
} else {
  itcr_slido_data <- readRDS(file.path("data", "itcr_slido_data.RDS"))
}
poll_data <- itcr_slido_data$`Polls-per-user` %>%
  janitor::clean_names()
promoters_categories <- poll_data %>% 
  dplyr::mutate(how_likely_would_you_be_to_recommend_this_workshop = 
                  as.numeric(how_likely_would_you_be_to_recommend_this_workshop), 
                promoter = dplyr::case_when(
                  how_likely_would_you_be_to_recommend_this_workshop < 7 ~ "detractors", 
                  how_likely_would_you_be_to_recommend_this_workshop == 7 ~ "passives", 
                  how_likely_would_you_be_to_recommend_this_workshop == 8 ~ "passives", 
                  how_likely_would_you_be_to_recommend_this_workshop > 8 ~ "promoters", 
                  TRUE ~ NA)) %>% 
  dplyr::count(promoter)
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `how_likely_would_you_be_to_recommend_this_workshop =
##   as.numeric(how_likely_would_you_be_to_recommend_this_workshop)`.
## Caused by warning:
## ! NAs introduced by coercion
promoter_sums <- promoters_categories$n
names(promoter_sums) <- promoters_categories$promoter
sum(promoter_sums, na.rm = TRUE)
## [1] 119
# Net Promoter Score
promoter_sums["promoters"]/sum(promoter_sums, na.rm = TRUE)- promoter_sums["detractors"]/sum(promoter_sums, na.rm = TRUE)
## promoters 
## 0.2521008

Data Visualizations

How likely would you be to recommend this workshop?

as.numeric(c(poll_data$how_likely_would_you_be_to_recommend_this_workshop, poll_data$how_likely_would_you_be_to_recommend_this_workshop_2)) %>%
  ggplot2::qplot(geom = "bar") +
  ggplot2::geom_bar(fill = "#CBC3E3") +
  ggplot2::theme_classic() +
  ggplot2::labs(title = "How likely would you be to recommend this workshop?")
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning: Removed 165 rows containing non-finite values (`stat_count()`).
## Removed 165 rows containing non-finite values (`stat_count()`).

  ggplot2::ggsave(file.path("plots", "itn_workshop_rec.png"), width = 4, height = 2)
## Warning: Removed 165 rows containing non-finite values (`stat_count()`).
## Removed 165 rows containing non-finite values (`stat_count()`).

Workshop Relevance Feedback

  poll_data <- poll_data %>%
    dplyr::filter(how_likely_are_you_to_use_what_you_learned_in_your_daily_work %in% c("Extremely likely", "Likely", "Not very likely", "Somewhat likely", "Very likely"))

  poll_data$how_likely_are_you_to_use_what_you_learned_in_your_daily_work <- factor(poll_data$how_likely_are_you_to_use_what_you_learned_in_your_daily_work, levels = c("Not very likely",  "Somewhat likely", "Likely", "Very likely", "Extremely likely"))

   ggplot2::ggplot(poll_data, ggplot2::aes(x = how_likely_are_you_to_use_what_you_learned_in_your_daily_work)) +
    ggplot2::geom_bar(stat = "count", fill = "#CBC3E3") +
    ggplot2::theme_classic() +
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
    ggplot2::labs(title = "How likely are you to use what you learned in your daily work?") +
    ggplot2::xlab("")

   ggplot2::ggsave(file.path("plots", "itn_relevance.png"), width = 4, height = 2)

Wordclouds for Qualitative Workshop Feedback

ud_model <- udpipe::udpipe_download_model(language = "english")
## Downloading udpipe model from https://raw.githubusercontent.com/jwijffels/udpipe.models.ud.2.5/master/inst/udpipe-ud-2.5-191206/english-ewt-ud-2.5-191206.udpipe to /Users/candacesavonen/Desktop/GitRepos/Hutch/itn-metric/english-ewt-ud-2.5-191206.udpipe
##  - This model has been trained on version 2.5 of data from https://universaldependencies.org
##  - The model is distributed under the CC-BY-SA-NC license: https://creativecommons.org/licenses/by-nc-sa/4.0
##  - Visit https://github.com/jwijffels/udpipe.models.ud.2.5 for model license details.
##  - For a list of all models and their licenses (most models you can download with this package have either a CC-BY-SA or a CC-BY-SA-NC license) read the documentation at ?udpipe_download_model. For building your own models: visit the documentation by typing vignette('udpipe-train', package = 'udpipe')
## Downloading finished, model stored at '/Users/candacesavonen/Desktop/GitRepos/Hutch/itn-metric/english-ewt-ud-2.5-191206.udpipe'
ud_model <- udpipe::udpipe_load_model(ud_model$file_model)

What did you like most about the workshop?

results <- udpipe::udpipe_annotate(ud_model, x = poll_data$what_did_you_like_most_about_the_workshop) %>%
  as.data.frame() %>%
  dplyr::filter(upos %in% c("NOUN", "ADJ", "ADV")) %>%
  dplyr::mutate(lemma= tolower(lemma)) %>%
  dplyr::count(lemma)

wordcloud::wordcloud(words = results$lemma, freq=results$n,colors = c("#98fb98", "#83D475", "#355E3B"),
          min.freq = 3, scale = c(3, .4))

Please share any recommendations you have for improvements

rec_results <- udpipe::udpipe_annotate(ud_model, x = poll_data$please_share_any_recommendations_you_have_for_improvements) %>%
  as.data.frame() %>%
  dplyr::filter(upos %in% c("NOUN", "ADJ", "ADV")) %>%
  dplyr::mutate(lemma= tolower(lemma)) %>%
  dplyr::count(lemma)

wordcloud::wordcloud(words = rec_results$lemma, freq=rec_results$n,colors = c("#98fb98", "#83D475", "#355E3B"),
          min.freq = 3, scale = c(4, .4))

Get number of unique workshop attendees

unique_emails <-
  unique(c(itcr_slido_data$`Polls-per-user`$`Please submit your email so we can log your attendance`,
           itcr_slido_data$`Polls-per-user`$`What's your email?`,
           itcr_slido_data$`Polls-per-user`$`What is your email?`))

length(unique_emails)
## [1] 47

Unique visitors to websites

ggplot2::ggplot(itcr_course_data, ggplot2::aes(x = reorder(website, -totalUsers), y = totalUsers, fill = target_audience)) +
  ggplot2::geom_bar(stat = "identity") +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::geom_text(ggplot2::aes(label = totalUsers), size = 3, vjust = - 1) +
  ggplot2::ylim(c(0, 6000)) + 
  ggplot2::scale_fill_manual(values=cbPalette)

  ggplot2::ggsave(file.path("plots", "itn_website_visits.png"), width = 4, height = 3) 

Table of user engagement

user_totals <- ga_metrics %>% 
  janitor::clean_names() %>% 
  dplyr::select(website, active_users, average_session_duration) %>% 
  knitr::kable(digits=2, long_table = TRUE,padding = 2) # %>%

user_totals
website active_users average_session_duration
AI for Efficient Programming 120 362.52
NIH Data Sharing 172 230.25
AI for Decision Makers 42 598.87
ITN Website 5542 181.93
Leadership in Cancer Informatics 396 229.81
Documentation and Usability 734 192.49
Computing for Cancer Informatics 610 279.10
Reproducibility in Cancer Informatics 1315 194.69
Advanced Reproducibility in Cancer Informatics 334 221.12
Ethical Data Handling 59 278.85
OTTR website 787 67.77
Choosing Genomics Tools 97 248.51
Overleaf and Latex for Scientific Articles 17 471.66
GitHub Automation for Scientists 23 202.12
metricminer.org 2 26.40
user_engagement <- ga_metrics %>% 
  janitor::clean_names() %>% 
  dplyr::select(website, screen_page_views_per_user, sessions, screen_page_views, engagement_rate) %>% 
  knitr::kable(digits=2, long_table = TRUE, padding = 2) # %>%

user_engagement
website screen_page_views_per_user sessions screen_page_views engagement_rate
AI for Efficient Programming 6.85 335 822 0.55
NIH Data Sharing 5.06 364 871 0.43
AI for Decision Makers 32.60 253 1369 0.69
ITN Website 2.58 8889 14310 0.45
Leadership in Cancer Informatics 3.76 752 1489 0.52
Documentation and Usability 2.66 1080 1952 0.52
Computing for Cancer Informatics 6.01 1317 3666 0.52
Reproducibility in Cancer Informatics 2.97 2080 3908 0.45
Advanced Reproducibility in Cancer Informatics 4.08 712 1364 0.51
Ethical Data Handling 8.31 207 490 0.51
OTTR website 1.41 1044 1112 0.31
Choosing Genomics Tools 6.20 253 601 0.58
Overleaf and Latex for Scientific Articles 9.94 49 169 0.61
GitHub Automation for Scientists 3.91 44 90 0.50
metricminer.org 2.00 3 4 0.67
itcr_course_data %>% janitor::clean_names() %>% 
  dplyr::select(website, screen_page_views_per_user, average_session_duration, event_count_per_user, engagement_rate, target_audience) %>% 
  tidyr::pivot_longer(!c(website, target_audience), 
                      names_to = "metric_name", 
                      values_to = "value") %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
  ggplot2::ggplot(ggplot2::aes(y = value, x = website, fill = target_audience)) +
  ggplot2::geom_bar(position = "dodge", stat = "identity") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 8), 
                 plot.margin = unit(c(1.5,.5,.5,.5), "cm")) +
  ggplot2::facet_wrap(~metric_name, scales = "free_y") + 
  ggplot2::scale_fill_manual(values=cbPalette)

ggplot2::ggsave(file.path("plots", "itn_engagement_stats.png"), width = 8, height = 6)

ITN Course and Website Traffic Overtime

web_traffic_overtime <- ga_dims %>% 
  dplyr::mutate(date = lubridate::ymd(paste0(year, "-", month, "-", day))) %>% 
  dplyr::mutate(month_year = lubridate::ym(paste0(year, "-", month))) %>% 
  dplyr::mutate(web_yn = dplyr::case_when(
    website == "ITN Website" ~ "ITN Website", 
    website != "ITN Website" ~ "ITN Online Course Website")) %>% 
  dplyr::left_join(manual_course_info) %>%
      dplyr::mutate(website = dplyr::case_when(
    website == "Advanced Reproducibility in Cancer Informatics" ~ "Advanced Reproducibility",
                                           TRUE ~ website)) 
## Joining with `by = join_by(website)`
traffic_plot <- web_traffic_overtime %>% 
  dplyr::filter(website %in% c("ITN Website", "OTTR website", "metricminer.org")) %>%
  dplyr::group_by(month_year) %>% 
  dplyr::count() %>% 
  ggplot2::ggplot(ggplot2::aes(y = n, x = month_year)) + 
  ggplot2::geom_bar(stat = "identity", fill = "pink") + 
  ggplot2::scale_x_date(date_labels = "%b %Y") + 
  ggplot2::ylab("Unique ITN Online Course Visitors") +
  ggplot2::xlab("") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1)) 

ggplot2::ggsave(file.path("plots", "itn_course_traffic_plot.png"), width = 4, height = 3.5)
  
traffic_plot

long_df <- itcr_course_data %>% dplyr::select(c("website", 
                                     "totalUsers",
                                     "coursera_count", 
                                     "leanpub_count", 
                                     "target_audience")) %>%
  tidyr::pivot_longer(!c(website, target_audience),
                                         names_to = "modality", 
                                         values_to = "learner_count") %>%
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
  dplyr::mutate(modality = dplyr::case_when(
    modality == "leanpub_count" ~ "Total Leanpub Enrollments", 
    modality == "coursera_count" ~ "Total Coursera Enrollments",
    modality == "totalUsers" ~ "Website Learners", 
    TRUE ~ modality
  ))

Total learners from each modality

long_df %>% 
  dplyr::group_by(modality, target_audience) %>% 
  dplyr::summarize(total_learners = sum(learner_count, na.rm = TRUE)) %>%
ggplot2::ggplot(ggplot2::aes(x = reorder(modality, -total_learners), y = total_learners, fill = target_audience)) +
  ggplot2::geom_bar(stat = "identity", na.rm = TRUE) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::ylab("Visitors/Enrollees") +
  ggplot2::geom_text(ggplot2::aes(label = total_learners), size = 3, vjust = - 1, na.rm = TRUE) + 
  ggplot2::ylim(c(0, 4200)) + 
  ggplot2::facet_wrap(~target_audience) + 
  ggplot2::scale_fill_manual(values=cbPalette)
## `summarise()` has grouped output by 'modality'. You can override using the
## `.groups` argument.

ggplot2::ggsave(file.path("plots", "itn_total_enrollments.png"), width = 8, height = 3)

Total learners by course

long_df %>% 
  dplyr::group_by(website, target_audience) %>% 
  dplyr::summarize(total_learners = sum(learner_count, na.rm = TRUE)) %>%
ggplot2::ggplot(ggplot2::aes(y = total_learners, x = reorder(website, -total_learners), fill = target_audience)) + 
  ggplot2::geom_bar(stat = "identity") + 
  ggplot2::ylab("Total learners by course") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 8)) + 
  ggplot2::geom_text(ggplot2::aes(label = total_learners), size = 3, vjust = - 1, na.rm = TRUE) +
  ggplot2::ylim(c(0, 1800)) + 
  ggplot2::xlab("") + 
  ggplot2::scale_fill_manual(values=cbPalette)
## `summarise()` has grouped output by 'website'. You can override using the
## `.groups` argument.

ggplot2::ggsave(file.path("plots", "itn_total_learners_by_course.png"), width = 10, height = 5)
long_df %>% 
  dplyr::group_by(website, target_audience) %>%
  dplyr::summarize(total_learners = sum(learner_count, na.rm = TRUE)) %>%
  dplyr::arrange(-total_learners) %>% 
  knitr::kable()
## `summarise()` has grouped output by 'website'. You can override using the
## `.groups` argument.
website target_audience total_learners
Reproducibility in Cancer Informatics New to data 1690
Computing for Cancer Informatics New to data 1562
Leadership in Cancer Informatics Leadership 1051
Documentation and Usability Software developers 1044
Advanced Reproducibility Software developers 613
AI for Efficient Programming Software developers 264
NIH Data Sharing Leadership 175
AI for Decision Makers Leadership 136
Choosing Genomics Tools New to data 110
Ethical Data Handling Leadership 60
GitHub Automation for Scientists Software developers 25
Overleaf and Latex for Scientific Articles Leadership 17

Course traffic by course

web_traffic_overtime %>%
  dplyr::group_by(website, month_year, target_audience) %>% 
  dplyr::count() %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>% 
ggplot2::ggplot(ggplot2::aes(y = n, x = month_year, fill = target_audience)) + 
  ggplot2::geom_bar(stat = "identity") + 
  ggplot2::scale_x_date(date_labels = "%b %Y") + 
  ggplot2::ylab("Unique ITN Website Learners Overtime") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 8)) + 
  ggplot2::facet_wrap(~website) + 
  ggplot2::scale_fill_manual(values=cbPalette)

ggplot2::ggsave(file.path("plots", "itn_course_traffic_plot_by_course.png"), width = 10, height = 5)

Number of learners by how long the course has been out

itcr_course_data %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
  ggplot2::ggplot(ggplot2::aes(x = coursera_launch, y = coursera_count, color = target_audience)) + 
  ggplot2::geom_point() + 
  ggplot2::theme_minimal() + 
  ggplot2::scale_color_manual(values=cbPalette)
## Warning: Removed 5 rows containing missing values (`geom_point()`).

itcr_course_data %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
  dplyr::mutate(duration = today() - website_launch) %>%
  ggplot2::ggplot(ggplot2::aes(x = duration, y = totalUsers, color = target_audience)) + 
  ggplot2::geom_point() + 
  ggplot2::theme_minimal() + 
  ggplot2::xlab("How long the course has been out") +
  ggplot2::scale_color_manual(values=cbPalette) + 
  ggplot2::geom_text(ggplot2::aes(x = duration, y = totalUsers, label = website), size = 3, vjust = - 1, na.rm = TRUE)
## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.

ggplot2::ggsave(file.path("plots", "itn_website_traffic_by_how_long_the_course_has_been_published.png"), width = 10, height = 5)
## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.

Software engagement

web_traffic_overtime %>% 
  dplyr::group_by(website, month_year) %>% 
  dplyr::count() %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
ggplot2::ggplot(ggplot2::aes(y = n, x = month_year)) + 
  ggplot2::geom_bar(stat = "identity", fill = "pink") + 
  ggplot2::scale_x_date(date_labels = "%b %Y") + 
  ggplot2::ylab("Unique ITN software website visitors") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 8)) 

ggplot2::ggsave(file.path("plots", "itn_software_website_traffic.png"), width = 10, height = 5)

CRAN downloads

 #download_stats <- cran_stats(c("ottrpal", "conrad", "ari", "text2speech"))

 #download_stats %>% dplyr::summarize(download_total = sum(downloads))

 #download_stats %>% dplyr::group_by(package) %>%
 #  dplyr::summarize(download_total = sum(downloads))

#sum(download_stats$downloads) + 426

#if (!is.null(download_stats)) {
#  print(head(download_stats))
#  ggplot(download_stats, aes(end, downloads, group=package, color=package)) +
#    geom_line() +
#    geom_point() +
#    scale_y_log10() + 
#    theme_minimal()
  
#  ggplot2::ggsave(file.path("plots", "itn_software_cran_downloads.png"), width = 10, height = 5)
# }

Coursera

ggplot2::ggplot(itcr_course_data %>% dplyr::filter(coursera_count > 0), ggplot2::aes(x = reorder(website, -coursera_count), y = coursera_count, fill = target_audience)) +
  ggplot2::geom_bar(stat = "identity", na.rm = TRUE) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::ylab("Coursera enrollments") +
  ggplot2::geom_text(ggplot2::aes(label = coursera_count), size = 3, vjust = - 1, na.rm = TRUE) +
  ggplot2::ylim(c(0, 1200)) + 
  ggplot2::scale_fill_manual(values=cbPalette)

  ggplot2::ggsave(file.path("plots", "itn_coursera_enrollments.png"), width = 4, height = 2)

Leanpub

ggplot2::ggplot(itcr_course_data %>% dplyr::filter(leanpub_count > 0) , ggplot2::aes(x = reorder(website, -leanpub_count), y = leanpub_count, fill = target_audience)) +
  ggplot2::geom_bar(stat = "identity", na.rm = TRUE) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::ylab("Leanpub enrollments") +
  ggplot2::geom_text(ggplot2::aes(label = leanpub_count), size = 3, vjust = - 1, na.rm = TRUE) +
  ggplot2::ylim(c(0, 40)) + 
  ggplot2::scale_fill_manual(values=cbPalette)

ggplot2::ggsave(file.path("plots", "itn_leanpub_enrollments.png"), width = 4, height = 2)

Session Info

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.5.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] dlstats_0.1.7     ggplot2_3.4.3     metricminer_0.1.0 testthat_3.2.0   
## [5] magrittr_2.0.3   
## 
## loaded via a namespace (and not attached):
##   [1] remotes_2.4.2.1     rlang_1.1.2         snakecase_0.11.1   
##   [4] compiler_4.3.1      systemfonts_1.0.4   callr_3.7.3        
##   [7] vctrs_0.6.5         stringr_1.5.1       profvis_0.3.8      
##  [10] pkgconfig_2.0.3     crayon_1.5.2        fastmap_1.1.1      
##  [13] ellipsis_0.3.2      labeling_0.4.3      utf8_1.2.4         
##  [16] promises_1.2.1      rmarkdown_2.25      sessioninfo_1.2.2  
##  [19] tzdb_0.4.0          ps_1.7.5            ragg_1.2.5         
##  [22] purrr_1.0.2         bit_4.0.5           xfun_0.41          
##  [25] cachem_1.0.8        jsonlite_1.8.8      highr_0.10         
##  [28] later_1.3.1         parallel_4.3.1      prettyunits_1.2.0  
##  [31] R6_2.5.1            bslib_0.6.1         stringi_1.8.3      
##  [34] RColorBrewer_1.1-3  pkgload_1.3.3       brio_1.1.3         
##  [37] lubridate_1.9.3     jquerylib_0.1.4     cellranger_1.1.0   
##  [40] udpipe_0.8.11       Rcpp_1.0.11         assertthat_0.2.1   
##  [43] knitr_1.45          usethis_2.2.2       readr_2.1.4        
##  [46] httpuv_1.6.11       Matrix_1.6-1.1      timechange_0.2.0   
##  [49] tidyselect_1.2.0    rstudioapi_0.15.0   yaml_2.3.8         
##  [52] miniUI_0.1.1.1      curl_5.2.0          processx_3.8.3     
##  [55] pkgbuild_1.4.2      lattice_0.21-8      tibble_3.2.1       
##  [58] shiny_1.7.5         withr_2.5.2         askpass_1.2.0      
##  [61] evaluate_0.23       desc_1.4.2          urlchecker_1.0.1   
##  [64] pillar_1.9.0        generics_0.1.3      vroom_1.6.3        
##  [67] rprojroot_2.0.4     hms_1.1.3           munsell_0.5.0      
##  [70] scales_1.2.1        xtable_1.8-4        glue_1.6.2         
##  [73] janitor_2.2.0       tools_4.3.1         data.table_1.14.8  
##  [76] fs_1.6.3            grid_4.3.1          tidyr_1.3.0        
##  [79] gh_1.4.0            devtools_2.4.5      colorspace_2.1-0   
##  [82] googlesheets4_1.1.1 googledrive_2.1.1   cli_3.6.2          
##  [85] textshaping_0.3.6   fansi_1.0.6         gargle_1.5.2       
##  [88] dplyr_1.1.4         gtable_0.3.4        sass_0.4.8         
##  [91] digest_0.6.33       wordcloud_2.6       htmlwidgets_1.6.2  
##  [94] farver_2.1.1        memoise_2.0.1       htmltools_0.5.7    
##  [97] lifecycle_1.0.4     httr_1.4.7          mime_0.12          
## [100] openssl_2.1.1       bit64_4.0.5