Data Set Up

dir.create("data", showWarnings = FALSE)
dir.create("plots", showWarnings = FALSE)


auth_from_secret("calendly", token = Sys.getenv("METRICMINER_CALENDLY"))

                 refresh_token = Sys.getenv("METRICMINER_GOOGLE_REFRESH"),
                 access_token = Sys.getenv("METRICMINER_GOOGLE_ACCESS"),
                 cache = TRUE)
auth_from_secret("github", token = Sys.getenv("METRICMINER_GITHUB_PAT"))

ga_accounts <- get_ga_user()
calendly_account <- get_calendly_user()
cbPalette <- c("#E69F00", "#56B4E9", "#CC79A7")

Collect online course data

if (!file.exists(file.path("data", "itcr_ga_metric_data.RDS"))) {
  fhdsl_stats_list <- get_all_ga_metrics(account_id = ga_accounts$id[1])
  itcr_stats_list <- get_all_ga_metrics(account_id = ga_accounts$id[2])
  # There's some google analytics that aren't ITCR courses
  not_itcr <- c("hutchdatasci", "whoiswho", "MMDS", "FH Cluster 101", "AnVIL_Researcher_Journey")

  # Set up each data frame 
  ga_metrics <- dplyr::bind_rows(fhdsl_stats_list$metrics ,itcr_stats_list$metrics) %>%
    !(website %in%not_itcr)
  saveRDS(ga_metrics, file.path("data","itcr_ga_metric_data.RDS"))
  ga_dims <- dplyr::bind_rows(fhdsl_stats_list$dimensions, itcr_stats_list$dimensions) %>%
    !(website %in% not_itcr)
  saveRDS(ga_dims, file.path("data","itcr_ga_dims_data.RDS"))
  ga_link_clicks <- dplyr::bind_rows(fhdsl_stats_list$link_clicks,itcr_stats_list$link_clicks) %>%
    !(website %in% not_itcr)
  saveRDS(ga_link_clicks, file.path("data","itcr_ga_link_click_data.RDS"))
} else {
  ga_metrics <- readRDS(file.path("data","itcr_ga_metric_data.RDS"))
  ga_dims <- readRDS(file.path("data","itcr_ga_dims_data.RDS"))
  ga_link_clicks <- readRDS(file.path("data","itcr_ga_link_click_data.RDS"))

manual_course_info <- googlesheets4::read_sheet(
  "", sheet = "Course_data", 
  col_types = "ccDDDciii") %>% 
  dplyr::mutate_if(is.numeric.Date, lubridate::ymd)
# Join this all together
itcr_course_data <- ga_metrics %>% 
  dplyr::left_join(manual_course_info) %>% 
  dplyr::mutate(website = dplyr::case_when(
    website == "Advanced Reproducibility in Cancer Informatics" ~ "Advanced Reproducibility",
                                           TRUE ~ website))
# Save this to a TSV
readr::write_tsv(itcr_course_data, file.path("data", "itcr_course_metrics.tsv"))

Get OTTR courses

sync_yamls <- c(

extract_repos <- function(yaml) {
  content <- yaml::read_yaml(yaml) 
  repos <- unlist(purrr::map(content$group, "repos"))
  repos <- unlist(strsplit(repos, "\n"))

all_ottr_repos <- unlist(lapply(sync_yamls, extract_repos))

ottr_df <- data.frame(repo_name = all_ottr_repos) %>%
  tidyr::separate(repo_name, into = c("organization", "repo"), sep = "\\/", remove = FALSE) %>% 
  dplyr::mutate(template = dplyr::case_when(
    stringr::str_detect(repo, "template|Template") ~ "template", 
    TRUE ~ "course"
  )) %>% 

ottr_df %>% dplyr::count(organization)
##         organization  n
## 1             C-MOOR  3
## 2  PracticalGenomics  1
## 3          abyzovlab  1
## 4      datatrail-jhu 15
## 5              fhdsl 23
## 6        griffithlab  3
## 7             jhudsl 26
## 8          mccoy-lab  1
## 9    opencasestudies  1
## 10            tmm211  1
ottr_df %>% 
  dplyr::filter(template == "course") %>% 
##    n
## 1 69

Collaboration Info

collabs <- googlesheets4::read_sheet("")
## [1] 106
collabs %>% dplyr::count(ITN_ITCR_or_external) %>% 
  dplyr::mutate(perc = n/sum(n))
## # A tibble: 8 × 3
##   ITN_ITCR_or_external             n    perc
##   <chr>                        <int>   <dbl>
## 1 ITCR                            67 0.632  
## 2 ITN                              5 0.0472 
## 3 NCI                              2 0.0189 
## 4 NIH                              2 0.0189 
## 5 external                         2 0.0189 
## 6 external (was after leaving)     1 0.00943
## 7 external NIH/NCI intermural      3 0.0283 
## 8 neither                         24 0.226
collabs <- collabs %>% tidyr::separate_rows("Category", sep = ", ", ) %>% 
  dplyr::mutate(Category = trimws(Category)) %>% 
  dplyr::filter(Category != "?")

## [1] 154
collabs %>% dplyr::count(Category) %>% 
  ggplot2::ggplot(ggplot2::aes(y = n, x = reorder(Category,-n), fill = Category)) +
  ggplot2::geom_bar(position = "dodge", stat = "identity") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 6),
                 plot.margin = unit(c(.75,.5,.5,.5), "cm")) + 

ggplot2::ggsave(file.path("plots", "itn_collaboration_types.png"), width = 7, height = 5)
open_agenda <- readLines("ITCR OPEN Group Agenda.txt")

dates <- grep("20", open_agenda, value = TRUE)
dates <- gsub(" Meeting| meeting", "", dates)
month <- stringr::word(dates, sep = " ")
year <- stringr::word(dates, sep = " ", start = 2)

names <- grep("20", open_agenda)
attendance <- abs(names[1:length(names)] - c(names[2:length(names)], length(open_agenda)))

open_attendance <- data.frame(
  date = lubridate::ym(paste(year,"-", month)),

ggplot2::ggplot(open_attendance, ggplot2::aes(x = date, y = attendance)) + 
  ggplot2::geom_bar(stat = "identity", fill = "lightgreen") +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::geom_text(ggplot2::aes(label = attendance), size = 3, vjust = - 1) 

  ggplot2::ggsave(file.path("plots", "open_attendance.png"), width = 4, height = 3)

Collect Loqui Video Creation User data

Unique users of Loqui

loqui_usage <- googlesheets4::read_sheet("")
## ✔ Reading from "Loqui User Data".
## ✔ Range 'Sheet1'.
loqui_usage %>% count(email) %>% dplyr::pull(email) %>% length()
## [1] 18

Number of videos made with Loqui

loqui_usage %>% count(email) %>% dplyr::pull(n) %>% sum()
## [1] 477

Collect Workshop Feedback Info

if (!file.exists(file.path("data", "itcr_slido_data.RDS"))) {
  itcr_drive_id <- ""
  itcr_slido_data <- get_slido_files(itcr_drive_id)

  saveRDS(itcr_slido_data, file.path("data", "itcr_slido_data.RDS"))
} else {
  itcr_slido_data <- readRDS(file.path("data", "itcr_slido_data.RDS"))
poll_data <- itcr_slido_data$`Polls-per-user` %>%
promoters_categories <- poll_data %>% 
  dplyr::mutate(how_likely_would_you_be_to_recommend_this_workshop = 
                promoter = dplyr::case_when(
                  how_likely_would_you_be_to_recommend_this_workshop < 7 ~ "detractors", 
                  how_likely_would_you_be_to_recommend_this_workshop == 7 ~ "passives", 
                  how_likely_would_you_be_to_recommend_this_workshop == 8 ~ "passives", 
                  how_likely_would_you_be_to_recommend_this_workshop > 8 ~ "promoters", 
                  TRUE ~ NA)) %>% 
promoter_sums <- promoters_categories$n
names(promoter_sums) <- promoters_categories$promoter
sum(promoter_sums, na.rm = TRUE)
## [1] 119
# Net Promoter Score
promoter_sums["promoters"]/sum(promoter_sums, na.rm = TRUE)- promoter_sums["detractors"]/sum(promoter_sums, na.rm = TRUE)
## promoters 
## 0.2521008

Data Visualizations

How likely would you be to recommend this workshop?

as.numeric(c(poll_data$how_likely_would_you_be_to_recommend_this_workshop, poll_data$how_likely_would_you_be_to_recommend_this_workshop_2)) %>%
  ggplot2::qplot(geom = "bar") +
  ggplot2::geom_bar(fill = "#CBC3E3") +
  ggplot2::theme_classic() +
  ggplot2::labs(title = "How likely would you be to recommend this workshop?")
  ggplot2::ggsave(file.path("plots", "itn_workshop_rec.png"), width = 4, height = 2)
Workshop Relevance Feedback

  poll_data <- poll_data %>%
    dplyr::filter(how_likely_are_you_to_use_what_you_learned_in_your_daily_work %in% c("Extremely likely", "Likely", "Not very likely", "Somewhat likely", "Very likely"))

  poll_data$how_likely_are_you_to_use_what_you_learned_in_your_daily_work <- factor(poll_data$how_likely_are_you_to_use_what_you_learned_in_your_daily_work, levels = c("Not very likely",  "Somewhat likely", "Likely", "Very likely", "Extremely likely"))

   ggplot2::ggplot(poll_data, ggplot2::aes(x = how_likely_are_you_to_use_what_you_learned_in_your_daily_work)) +
    ggplot2::geom_bar(stat = "count", fill = "#CBC3E3") +
    ggplot2::theme_classic() +
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
    ggplot2::labs(title = "How likely are you to use what you learned in your daily work?") +

   ggplot2::ggsave(file.path("plots", "itn_relevance.png"), width = 4, height = 2)

Wordclouds for Qualitative Workshop Feedback

ud_model <- udpipe::udpipe_download_model(language = "english")
ud_model <- udpipe::udpipe_load_model(ud_model$file_model)

What did you like most about the workshop?

results <- udpipe::udpipe_annotate(ud_model, x = poll_data$what_did_you_like_most_about_the_workshop) %>% %>%
  dplyr::filter(upos %in% c("NOUN", "ADJ", "ADV")) %>%
  dplyr::mutate(lemma= tolower(lemma)) %>%

wordcloud::wordcloud(words = results$lemma, freq=results$n,colors = c("#98fb98", "#83D475", "#355E3B"),
          min.freq = 3, scale = c(3, .4))

Please share any recommendations you have for improvements

rec_results <- udpipe::udpipe_annotate(ud_model, x = poll_data$please_share_any_recommendations_you_have_for_improvements) %>% %>%
  dplyr::filter(upos %in% c("NOUN", "ADJ", "ADV")) %>%
  dplyr::mutate(lemma= tolower(lemma)) %>%

wordcloud::wordcloud(words = rec_results$lemma, freq=rec_results$n,colors = c("#98fb98", "#83D475", "#355E3B"),
          min.freq = 3, scale = c(4, .4))

Get number of unique workshop attendees

unique_emails <-
  unique(c(itcr_slido_data$`Polls-per-user`$`Please submit your email so we can log your attendance`,
           itcr_slido_data$`Polls-per-user`$`What's your email?`,
           itcr_slido_data$`Polls-per-user`$`What is your email?`))

## [1] 47

Unique visitors to websites

ggplot2::ggplot(itcr_course_data, ggplot2::aes(x = reorder(website, -totalUsers), y = totalUsers, fill = target_audience)) +
  ggplot2::geom_bar(stat = "identity") +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::geom_text(ggplot2::aes(label = totalUsers), size = 3, vjust = - 1) +
  ggplot2::ylim(c(0, 6000)) + 

  ggplot2::ggsave(file.path("plots", "itn_website_visits.png"), width = 4, height = 3) 

Table of user engagement

user_totals <- ga_metrics %>% 
  janitor::clean_names() %>% 
  dplyr::select(website, active_users, average_session_duration) %>% 
  knitr::kable(digits=2, long_table = TRUE,padding = 2) # %>%

website active_users average_session_duration
AI for Efficient Programming 120 362.52
NIH Data Sharing 172 230.25
AI for Decision Makers 42 598.87
ITN Website 5542 181.93
Leadership in Cancer Informatics 396 229.81
Documentation and Usability 734 192.49
Computing for Cancer Informatics 610 279.10
Reproducibility in Cancer Informatics 1315 194.69
Advanced Reproducibility in Cancer Informatics 334 221.12
Ethical Data Handling 59 278.85
OTTR website 787 67.77
Choosing Genomics Tools 97 248.51
Overleaf and Latex for Scientific Articles 17 471.66
GitHub Automation for Scientists 23 202.12 2 26.40
user_engagement <- ga_metrics %>% 
  janitor::clean_names() %>% 
  dplyr::select(website, screen_page_views_per_user, sessions, screen_page_views, engagement_rate) %>% 
  knitr::kable(digits=2, long_table = TRUE, padding = 2) # %>%

website screen_page_views_per_user sessions screen_page_views engagement_rate
AI for Efficient Programming 6.85 335 822 0.55
NIH Data Sharing 5.06 364 871 0.43
AI for Decision Makers 32.60 253 1369 0.69
ITN Website 2.58 8889 14310 0.45
Leadership in Cancer Informatics 3.76 752 1489 0.52
Documentation and Usability 2.66 1080 1952 0.52
Computing for Cancer Informatics 6.01 1317 3666 0.52
Reproducibility in Cancer Informatics 2.97 2080 3908 0.45
Advanced Reproducibility in Cancer Informatics 4.08 712 1364 0.51
Ethical Data Handling 8.31 207 490 0.51
OTTR website 1.41 1044 1112 0.31
Choosing Genomics Tools 6.20 253 601 0.58
Overleaf and Latex for Scientific Articles 9.94 49 169 0.61
GitHub Automation for Scientists 3.91 44 90 0.50 2.00 3 4 0.67
itcr_course_data %>% janitor::clean_names() %>% 
  dplyr::select(website, screen_page_views_per_user, average_session_duration, event_count_per_user, engagement_rate, target_audience) %>% 
  tidyr::pivot_longer(!c(website, target_audience), 
                      names_to = "metric_name", 
                      values_to = "value") %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", ""))) %>%
  ggplot2::ggplot(ggplot2::aes(y = value, x = website, fill = target_audience)) +
  ggplot2::geom_bar(position = "dodge", stat = "identity") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 8), 
                 plot.margin = unit(c(1.5,.5,.5,.5), "cm")) +
  ggplot2::facet_wrap(~metric_name, scales = "free_y") + 

ggplot2::ggsave(file.path("plots", "itn_engagement_stats.png"), width = 8, height = 6)

ITN Course and Website Traffic Overtime

web_traffic_overtime <- ga_dims %>% 
  dplyr::mutate(date = lubridate::ymd(paste0(year, "-", month, "-", day))) %>% 
  dplyr::mutate(month_year = lubridate::ym(paste0(year, "-", month))) %>% 
  dplyr::mutate(web_yn = dplyr::case_when(
    website == "ITN Website" ~ "ITN Website", 
    website != "ITN Website" ~ "ITN Online Course Website")) %>% 
  dplyr::left_join(manual_course_info) %>%
      dplyr::mutate(website = dplyr::case_when(
    website == "Advanced Reproducibility in Cancer Informatics" ~ "Advanced Reproducibility",
                                           TRUE ~ website)) 
traffic_plot <- web_traffic_overtime %>% 
  dplyr::filter(website %in% c("ITN Website", "OTTR website", "")) %>%
  dplyr::group_by(month_year) %>% 
  dplyr::count() %>% 
  ggplot2::ggplot(ggplot2::aes(y = n, x = month_year)) + 
  ggplot2::geom_bar(stat = "identity", fill = "pink") + 
  ggplot2::scale_x_date(date_labels = "%b %Y") + 
  ggplot2::ylab("Unique ITN Online Course Visitors") +
  ggplot2::xlab("") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1)) 

ggplot2::ggsave(file.path("plots", "itn_course_traffic_plot.png"), width = 4, height = 3.5)

long_df <- itcr_course_data %>% dplyr::select(c("website", 
                                     "target_audience")) %>%
  tidyr::pivot_longer(!c(website, target_audience),
                                         names_to = "modality", 
                                         values_to = "learner_count") %>%
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", ""))) %>%
  dplyr::mutate(modality = dplyr::case_when(
    modality == "leanpub_count" ~ "Total Leanpub Enrollments", 
    modality == "coursera_count" ~ "Total Coursera Enrollments",
    modality == "totalUsers" ~ "Website Learners", 
    TRUE ~ modality

Total learners from each modality

long_df %>% 
  dplyr::group_by(modality, target_audience) %>% 
  dplyr::summarize(total_learners = sum(learner_count, na.rm = TRUE)) %>%
ggplot2::ggplot(ggplot2::aes(x = reorder(modality, -total_learners), y = total_learners, fill = target_audience)) +
  ggplot2::geom_bar(stat = "identity", na.rm = TRUE) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::ylab("Visitors/Enrollees") +
  ggplot2::geom_text(ggplot2::aes(label = total_learners), size = 3, vjust = - 1, na.rm = TRUE) + 
  ggplot2::ylim(c(0, 4200)) + 
  ggplot2::facet_wrap(~target_audience) + 
ggplot2::ggsave(file.path("plots", "itn_total_enrollments.png"), width = 8, height = 3)

Total learners by course

long_df %>% 
  dplyr::group_by(website, target_audience) %>% 
  dplyr::summarize(total_learners = sum(learner_count, na.rm = TRUE)) %>%
ggplot2::ggplot(ggplot2::aes(y = total_learners, x = reorder(website, -total_learners), fill = target_audience)) + 
  ggplot2::geom_bar(stat = "identity") + 
  ggplot2::ylab("Total learners by course") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 8)) + 
  ggplot2::geom_text(ggplot2::aes(label = total_learners), size = 3, vjust = - 1, na.rm = TRUE) +
  ggplot2::ylim(c(0, 1800)) + 
  ggplot2::xlab("") + 
ggplot2::ggsave(file.path("plots", "itn_total_learners_by_course.png"), width = 10, height = 5)
long_df %>% 
  dplyr::group_by(website, target_audience) %>%
  dplyr::summarize(total_learners = sum(learner_count, na.rm = TRUE)) %>%
  dplyr::arrange(-total_learners) %>% 
website target_audience total_learners
Reproducibility in Cancer Informatics New to data 1690
Computing for Cancer Informatics New to data 1562
Leadership in Cancer Informatics Leadership 1051
Documentation and Usability Software developers 1044
Advanced Reproducibility Software developers 613
AI for Efficient Programming Software developers 264
NIH Data Sharing Leadership 175
AI for Decision Makers Leadership 136
Choosing Genomics Tools New to data 110
Ethical Data Handling Leadership 60
GitHub Automation for Scientists Software developers 25
Overleaf and Latex for Scientific Articles Leadership 17

Course traffic by course

web_traffic_overtime %>%
  dplyr::group_by(website, month_year, target_audience) %>% 
  dplyr::count() %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", ""))) %>% 
ggplot2::ggplot(ggplot2::aes(y = n, x = month_year, fill = target_audience)) + 
  ggplot2::geom_bar(stat = "identity") + 
  ggplot2::scale_x_date(date_labels = "%b %Y") + 
  ggplot2::ylab("Unique ITN Website Learners Overtime") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 8)) + 
  ggplot2::facet_wrap(~website) + 

ggplot2::ggsave(file.path("plots", "itn_course_traffic_plot_by_course.png"), width = 10, height = 5)

Number of learners by how long the course has been out

itcr_course_data %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", ""))) %>%
  ggplot2::ggplot(ggplot2::aes(x = coursera_launch, y = coursera_count, color = target_audience)) + 
  ggplot2::geom_point() + 
  ggplot2::theme_minimal() + 
itcr_course_data %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", ""))) %>%
  dplyr::mutate(duration = today() - website_launch) %>%
  ggplot2::ggplot(ggplot2::aes(x = duration, y = totalUsers, color = target_audience)) + 
  ggplot2::geom_point() + 
  ggplot2::theme_minimal() + 
  ggplot2::xlab("How long the course has been out") +
  ggplot2::scale_color_manual(values=cbPalette) + 
  ggplot2::geom_text(ggplot2::aes(x = duration, y = totalUsers, label = website), size = 3, vjust = - 1, na.rm = TRUE)
ggplot2::ggsave(file.path("plots", "itn_website_traffic_by_how_long_the_course_has_been_published.png"), width = 10, height = 5)
Software engagement

web_traffic_overtime %>% 
  dplyr::group_by(website, month_year) %>% 
  dplyr::count() %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", ""))) %>%
ggplot2::ggplot(ggplot2::aes(y = n, x = month_year)) + 
  ggplot2::geom_bar(stat = "identity", fill = "pink") + 
  ggplot2::scale_x_date(date_labels = "%b %Y") + 
  ggplot2::ylab("Unique ITN software website visitors") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 8)) 

ggplot2::ggsave(file.path("plots", "itn_software_website_traffic.png"), width = 10, height = 5)

ggplot2::ggplot(itcr_course_data %>% dplyr::filter(coursera_count > 0), ggplot2::aes(x = reorder(website, -coursera_count), y = coursera_count, fill = target_audience)) +
  ggplot2::geom_bar(stat = "identity", na.rm = TRUE) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::ylab("Coursera enrollments") +
  ggplot2::geom_text(ggplot2::aes(label = coursera_count), size = 3, vjust = - 1, na.rm = TRUE) +
  ggplot2::ylim(c(0, 1200)) + 

  ggplot2::ggsave(file.path("plots", "itn_coursera_enrollments.png"), width = 4, height = 2)


ggplot2::ggplot(itcr_course_data %>% dplyr::filter(leanpub_count > 0) , ggplot2::aes(x = reorder(website, -leanpub_count), y = leanpub_count, fill = target_audience)) +
  ggplot2::geom_bar(stat = "identity", na.rm = TRUE) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::ylab("Leanpub enrollments") +
  ggplot2::geom_text(ggplot2::aes(label = leanpub_count), size = 3, vjust = - 1, na.rm = TRUE) +
  ggplot2::ylim(c(0, 40)) + 

ggplot2::ggsave(file.path("plots", "itn_leanpub_enrollments.png"), width = 4, height = 2)

