ITN Eval Metrics

Data Set Up

dir.create("data", showWarnings = FALSE)
dir.create("plots", showWarnings = FALSE)

set.seed(1234)

devtools::load_all("../metricminer/")

## ℹ Loading metricminer

library("ggplot2")
library("dlstats")
library("magrittr")

source(file.path("local_auth_2.R"))

## You chose to cache your credentials, if you change your mind, run metricminer::delete_creds().
##             
##  Be careful not to push .httr-oauth or RDS files to GitHub or share it anywhere.

auth_from_secret("calendly", token = Sys.getenv("METRICMINER_CALENDLY"))

auth_from_secret("google",
                 refresh_token = Sys.getenv("METRICMINER_GOOGLE_REFRESH"),
                 access_token = Sys.getenv("METRICMINER_GOOGLE_ACCESS"),
                 cache = TRUE)

## You chose to cache your credentials, if you change your mind, run metricminer::delete_creds().
##             
##  Be careful not to push .httr-oauth or RDS files to GitHub or share it anywhere.

auth_from_secret("github", token = Sys.getenv("METRICMINER_GITHUB_PAT"))

ga_accounts <- get_ga_user()

## Using user-supplied cached token using authorize("google")
## Auto-refreshing stale OAuth token.

calendly_account <- get_calendly_user()

## Using user-supplied cached token using authorize("calendly")

cbPalette <- c("#E69F00", "#56B4E9", "#CC79A7")

Collect online course data

if (!file.exists(file.path("data", "itcr_ga_metric_data.RDS"))) {
  fhdsl_stats_list <- get_all_ga_metrics(account_id = ga_accounts$id[1])
  itcr_stats_list <- get_all_ga_metrics(account_id = ga_accounts$id[2])
  
  # There's some google analytics that aren't ITCR courses
  not_itcr <- c("hutchdatasci", "whoiswho", "MMDS", "FH Cluster 101", "AnVIL_Researcher_Journey")

  # Set up each data frame 
  ga_metrics <- dplyr::bind_rows(fhdsl_stats_list$metrics ,itcr_stats_list$metrics) %>%
  dplyr::filter(
    !(website %in%not_itcr)
    )
  saveRDS(ga_metrics, file.path("data","itcr_ga_metric_data.RDS"))
  
  ga_dims <- dplyr::bind_rows(fhdsl_stats_list$dimensions, itcr_stats_list$dimensions) %>%
  dplyr::filter(
    !(website %in% not_itcr)
    )
  saveRDS(ga_dims, file.path("data","itcr_ga_dims_data.RDS"))
  
  ga_link_clicks <- dplyr::bind_rows(fhdsl_stats_list$link_clicks,itcr_stats_list$link_clicks) %>%
  dplyr::filter(
    !(website %in% not_itcr)
    )
  saveRDS(ga_link_clicks, file.path("data","itcr_ga_link_click_data.RDS"))
  
} else {
  ga_metrics <- readRDS(file.path("data","itcr_ga_metric_data.RDS"))
  ga_dims <- readRDS(file.path("data","itcr_ga_dims_data.RDS"))
  ga_link_clicks <- readRDS(file.path("data","itcr_ga_link_click_data.RDS"))
}

manual_course_info <- googlesheets4::read_sheet(
  "https://docs.google.com/spreadsheets/d/1-8vox2LzkVKzhmSFXCWjwt3jFtK-wHibRAq2fqbxEyo/edit#gid=1550012125", sheet = "Course_data", 
  col_types = "ccDDDciii") %>% 
  dplyr::mutate_if(is.numeric.Date, lubridate::ymd)

## ✔ Reading from "ITN_collaborations_and_course_metrics".

## ✔ Range ''Course_data''.

## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion

## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion

## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion

## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion

## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion

## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion

# Join this all together
itcr_course_data <- ga_metrics %>% 
  dplyr::left_join(manual_course_info) %>% 
  dplyr::mutate(website = dplyr::case_when(
    website == "Advanced Reproducibility in Cancer Informatics" ~ "Advanced Reproducibility",
                                           TRUE ~ website))

## Joining with `by = join_by(website)`

# Save this to a TSV
readr::write_tsv(itcr_course_data, file.path("data", "itcr_course_metrics.tsv"))

Get OTTR courses

sync_yamls <- c(
  "https://raw.githubusercontent.com/jhudsl/OTTR_Template/main/.github/sync.yml", 
  "https://raw.githubusercontent.com/C-MOOR/C-MOOR_Template//main/.github/sync.yml", 
  "https://raw.githubusercontent.com/jhudsl/AnVIL_Template/main/.github/sync.yml",
  "https://raw.githubusercontent.com/datatrail-jhu/DataTrail_Template/main/.github/sync.yml"
)

extract_repos <- function(yaml) {
  content <- yaml::read_yaml(yaml) 
  repos <- unlist(purrr::map(content$group, "repos"))
  repos <- unlist(strsplit(repos, "\n"))
  return(repos)
}

all_ottr_repos <- unlist(lapply(sync_yamls, extract_repos))

ottr_df <- data.frame(repo_name = all_ottr_repos) %>%
  tidyr::separate(repo_name, into = c("organization", "repo"), sep = "\\/", remove = FALSE) %>% 
  dplyr::mutate(template = dplyr::case_when(
    stringr::str_detect(repo, "template|Template") ~ "template", 
    TRUE ~ "course"
  )) %>% 
  dplyr::distinct()

ottr_df %>% dplyr::count(organization)

##         organization  n
## 1             C-MOOR  3
## 2  PracticalGenomics  1
## 3          abyzovlab  1
## 4      datatrail-jhu 15
## 5              fhdsl 23
## 6        griffithlab  3
## 7             jhudsl 26
## 8          mccoy-lab  1
## 9    opencasestudies  1
## 10            tmm211  1

ottr_df %>% 
  dplyr::filter(template == "course") %>% 
  dplyr::count()

##    n
## 1 69

Collaboration Info

collabs <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1-8vox2LzkVKzhmSFXCWjwt3jFtK-wHibRAq2fqbxEyo/edit#gid=0")

## ✔ Reading from "ITN_collaborations_and_course_metrics".

## ✔ Range 'SuccesfulCollabs'.

nrow(collabs)

## [1] 106

collabs %>% dplyr::count(ITN_ITCR_or_external) %>% 
  dplyr::mutate(perc = n/sum(n))

## # A tibble: 8 × 3
##   ITN_ITCR_or_external             n    perc
##   <chr>                        <int>   <dbl>
## 1 ITCR                            67 0.632  
## 2 ITN                              5 0.0472 
## 3 NCI                              2 0.0189 
## 4 NIH                              2 0.0189 
## 5 external                         2 0.0189 
## 6 external (was after leaving)     1 0.00943
## 7 external NIH/NCI intermural      3 0.0283 
## 8 neither                         24 0.226

collabs <- collabs %>% tidyr::separate_rows("Category", sep = ", ", ) %>% 
  dplyr::mutate(Category = trimws(Category)) %>% 
  dplyr::filter(Category != "?")

nrow(collabs)

## [1] 154

collabs %>% dplyr::count(Category) %>% 
  ggplot2::ggplot(ggplot2::aes(y = n, x = reorder(Category,-n), fill = Category)) +
  ggplot2::geom_bar(position = "dodge", stat = "identity") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 6),
                 legend.position="none", 
                 plot.margin = unit(c(.75,.5,.5,.5), "cm")) + 
  ggplot2::xlab("")

ggplot2::ggsave(file.path("plots", "itn_collaboration_types.png"), width = 7, height = 5)

open_agenda <- readLines("ITCR OPEN Group Agenda.txt")

dates <- grep("20", open_agenda, value = TRUE)
dates <- gsub(" Meeting| meeting", "", dates)
month <- stringr::word(dates, sep = " ")
year <- stringr::word(dates, sep = " ", start = 2)

names <- grep("20", open_agenda)
attendance <- abs(names[1:length(names)] - c(names[2:length(names)], length(open_agenda)))


open_attendance <- data.frame(
  date = lubridate::ym(paste(year,"-", month)),
  attendance)

ggplot2::ggplot(open_attendance, ggplot2::aes(x = date, y = attendance)) + 
  ggplot2::geom_bar(stat = "identity", fill = "lightgreen") +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::geom_text(ggplot2::aes(label = attendance), size = 3, vjust = - 1)

  ggplot2::ggsave(file.path("plots", "open_attendance.png"), width = 4, height = 3)

Collect Loqui Video Creation User data

Unique users of Loqui

loqui_usage <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1G_HTU-bv2k5txExP8EH3ScUfGqtW1P3syThD84Z-g9k/edit#gid=0")

## ✔ Reading from "Loqui User Data".

## ✔ Range 'Sheet1'.

loqui_usage %>% count(email) %>% dplyr::pull(email) %>% length()

## [1] 18

Number of videos made with Loqui

loqui_usage %>% count(email) %>% dplyr::pull(n) %>% sum()

## [1] 477

Collect Workshop Feedback Info

if (!file.exists(file.path("data", "itcr_slido_data.RDS"))) {
  itcr_drive_id <- "https://drive.google.com/drive/folders/0AJb5Zemj0AAkUk9PVA"
  itcr_slido_data <- get_slido_files(itcr_drive_id)

  saveRDS(itcr_slido_data, file.path("data", "itcr_slido_data.RDS"))
} else {
  itcr_slido_data <- readRDS(file.path("data", "itcr_slido_data.RDS"))
}
poll_data <- itcr_slido_data$`Polls-per-user` %>%
  janitor::clean_names()

promoters_categories <- poll_data %>% 
  dplyr::mutate(how_likely_would_you_be_to_recommend_this_workshop = 
                  as.numeric(how_likely_would_you_be_to_recommend_this_workshop), 
                promoter = dplyr::case_when(
                  how_likely_would_you_be_to_recommend_this_workshop < 7 ~ "detractors", 
                  how_likely_would_you_be_to_recommend_this_workshop == 7 ~ "passives", 
                  how_likely_would_you_be_to_recommend_this_workshop == 8 ~ "passives", 
                  how_likely_would_you_be_to_recommend_this_workshop > 8 ~ "promoters", 
                  TRUE ~ NA)) %>% 
  dplyr::count(promoter)

## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `how_likely_would_you_be_to_recommend_this_workshop =
##   as.numeric(how_likely_would_you_be_to_recommend_this_workshop)`.
## Caused by warning:
## ! NAs introduced by coercion

promoter_sums <- promoters_categories$n
names(promoter_sums) <- promoters_categories$promoter
sum(promoter_sums, na.rm = TRUE)

## [1] 119

# Net Promoter Score
promoter_sums["promoters"]/sum(promoter_sums, na.rm = TRUE)- promoter_sums["detractors"]/sum(promoter_sums, na.rm = TRUE)

## promoters 
## 0.2521008

Data Visualizations

How likely would you be to recommend this workshop?

as.numeric(c(poll_data$how_likely_would_you_be_to_recommend_this_workshop, poll_data$how_likely_would_you_be_to_recommend_this_workshop_2)) %>%
  ggplot2::qplot(geom = "bar") +
  ggplot2::geom_bar(fill = "#CBC3E3") +
  ggplot2::theme_classic() +
  ggplot2::labs(title = "How likely would you be to recommend this workshop?")

## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning: Removed 165 rows containing non-finite values (`stat_count()`).
## Removed 165 rows containing non-finite values (`stat_count()`).

  ggplot2::ggsave(file.path("plots", "itn_workshop_rec.png"), width = 4, height = 2)

## Warning: Removed 165 rows containing non-finite values (`stat_count()`).
## Removed 165 rows containing non-finite values (`stat_count()`).

Workshop Relevance Feedback

  poll_data <- poll_data %>%
    dplyr::filter(how_likely_are_you_to_use_what_you_learned_in_your_daily_work %in% c("Extremely likely", "Likely", "Not very likely", "Somewhat likely", "Very likely"))

  poll_data$how_likely_are_you_to_use_what_you_learned_in_your_daily_work <- factor(poll_data$how_likely_are_you_to_use_what_you_learned_in_your_daily_work, levels = c("Not very likely",  "Somewhat likely", "Likely", "Very likely", "Extremely likely"))

   ggplot2::ggplot(poll_data, ggplot2::aes(x = how_likely_are_you_to_use_what_you_learned_in_your_daily_work)) +
    ggplot2::geom_bar(stat = "count", fill = "#CBC3E3") +
    ggplot2::theme_classic() +
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
    ggplot2::labs(title = "How likely are you to use what you learned in your daily work?") +
    ggplot2::xlab("")

   ggplot2::ggsave(file.path("plots", "itn_relevance.png"), width = 4, height = 2)

Wordclouds for Qualitative Workshop Feedback

ud_model <- udpipe::udpipe_download_model(language = "english")

## Downloading udpipe model from https://raw.githubusercontent.com/jwijffels/udpipe.models.ud.2.5/master/inst/udpipe-ud-2.5-191206/english-ewt-ud-2.5-191206.udpipe to /Users/candacesavonen/Desktop/GitRepos/Hutch/itn-metric/english-ewt-ud-2.5-191206.udpipe

##  - This model has been trained on version 2.5 of data from https://universaldependencies.org

##  - The model is distributed under the CC-BY-SA-NC license: https://creativecommons.org/licenses/by-nc-sa/4.0

##  - Visit https://github.com/jwijffels/udpipe.models.ud.2.5 for model license details.

##  - For a list of all models and their licenses (most models you can download with this package have either a CC-BY-SA or a CC-BY-SA-NC license) read the documentation at ?udpipe_download_model. For building your own models: visit the documentation by typing vignette('udpipe-train', package = 'udpipe')

## Downloading finished, model stored at '/Users/candacesavonen/Desktop/GitRepos/Hutch/itn-metric/english-ewt-ud-2.5-191206.udpipe'

ud_model <- udpipe::udpipe_load_model(ud_model$file_model)

What did you like most about the workshop?

results <- udpipe::udpipe_annotate(ud_model, x = poll_data$what_did_you_like_most_about_the_workshop) %>%
  as.data.frame() %>%
  dplyr::filter(upos %in% c("NOUN", "ADJ", "ADV")) %>%
  dplyr::mutate(lemma= tolower(lemma)) %>%
  dplyr::count(lemma)

wordcloud::wordcloud(words = results$lemma, freq=results$n,colors = c("#98fb98", "#83D475", "#355E3B"),
          min.freq = 3, scale = c(3, .4))

Please share any recommendations you have for improvements

rec_results <- udpipe::udpipe_annotate(ud_model, x = poll_data$please_share_any_recommendations_you_have_for_improvements) %>%
  as.data.frame() %>%
  dplyr::filter(upos %in% c("NOUN", "ADJ", "ADV")) %>%
  dplyr::mutate(lemma= tolower(lemma)) %>%
  dplyr::count(lemma)

wordcloud::wordcloud(words = rec_results$lemma, freq=rec_results$n,colors = c("#98fb98", "#83D475", "#355E3B"),
          min.freq = 3, scale = c(4, .4))

Get number of unique workshop attendees

unique_emails <-
  unique(c(itcr_slido_data$`Polls-per-user`$`Please submit your email so we can log your attendance`,
           itcr_slido_data$`Polls-per-user`$`What's your email?`,
           itcr_slido_data$`Polls-per-user`$`What is your email?`))

length(unique_emails)

## [1] 47

Unique visitors to websites

ggplot2::ggplot(itcr_course_data, ggplot2::aes(x = reorder(website, -totalUsers), y = totalUsers, fill = target_audience)) +
  ggplot2::geom_bar(stat = "identity") +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::geom_text(ggplot2::aes(label = totalUsers), size = 3, vjust = - 1) +
  ggplot2::ylim(c(0, 6000)) + 
  ggplot2::scale_fill_manual(values=cbPalette)

  ggplot2::ggsave(file.path("plots", "itn_website_visits.png"), width = 4, height = 3)

Table of user engagement

user_totals <- ga_metrics %>% 
  janitor::clean_names() %>% 
  dplyr::select(website, active_users, average_session_duration) %>% 
  knitr::kable(digits=2, long_table = TRUE,padding = 2) # %>%

user_totals

website	active_users	average_session_duration
AI for Efficient Programming	120	362.52
NIH Data Sharing	172	230.25
AI for Decision Makers	42	598.87
ITN Website	5542	181.93
Leadership in Cancer Informatics	396	229.81
Documentation and Usability	734	192.49
Computing for Cancer Informatics	610	279.10
Reproducibility in Cancer Informatics	1315	194.69
Advanced Reproducibility in Cancer Informatics	334	221.12
Ethical Data Handling	59	278.85
OTTR website	787	67.77
Choosing Genomics Tools	97	248.51
Overleaf and Latex for Scientific Articles	17	471.66
GitHub Automation for Scientists	23	202.12
metricminer.org	2	26.40

user_engagement <- ga_metrics %>% 
  janitor::clean_names() %>% 
  dplyr::select(website, screen_page_views_per_user, sessions, screen_page_views, engagement_rate) %>% 
  knitr::kable(digits=2, long_table = TRUE, padding = 2) # %>%

user_engagement

website	screen_page_views_per_user	sessions	screen_page_views	engagement_rate
AI for Efficient Programming	6.85	335	822	0.55
NIH Data Sharing	5.06	364	871	0.43
AI for Decision Makers	32.60	253	1369	0.69
ITN Website	2.58	8889	14310	0.45
Leadership in Cancer Informatics	3.76	752	1489	0.52
Documentation and Usability	2.66	1080	1952	0.52
Computing for Cancer Informatics	6.01	1317	3666	0.52
Reproducibility in Cancer Informatics	2.97	2080	3908	0.45
Advanced Reproducibility in Cancer Informatics	4.08	712	1364	0.51
Ethical Data Handling	8.31	207	490	0.51
OTTR website	1.41	1044	1112	0.31
Choosing Genomics Tools	6.20	253	601	0.58
Overleaf and Latex for Scientific Articles	9.94	49	169	0.61
GitHub Automation for Scientists	3.91	44	90	0.50
metricminer.org	2.00	3	4	0.67

itcr_course_data %>% janitor::clean_names() %>% 
  dplyr::select(website, screen_page_views_per_user, average_session_duration, event_count_per_user, engagement_rate, target_audience) %>% 
  tidyr::pivot_longer(!c(website, target_audience), 
                      names_to = "metric_name", 
                      values_to = "value") %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
  ggplot2::ggplot(ggplot2::aes(y = value, x = website, fill = target_audience)) +
  ggplot2::geom_bar(position = "dodge", stat = "identity") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 8), 
                 plot.margin = unit(c(1.5,.5,.5,.5), "cm")) +
  ggplot2::facet_wrap(~metric_name, scales = "free_y") + 
  ggplot2::scale_fill_manual(values=cbPalette)

ggplot2::ggsave(file.path("plots", "itn_engagement_stats.png"), width = 8, height = 6)

ITN Course and Website Traffic Overtime

web_traffic_overtime <- ga_dims %>% 
  dplyr::mutate(date = lubridate::ymd(paste0(year, "-", month, "-", day))) %>% 
  dplyr::mutate(month_year = lubridate::ym(paste0(year, "-", month))) %>% 
  dplyr::mutate(web_yn = dplyr::case_when(
    website == "ITN Website" ~ "ITN Website", 
    website != "ITN Website" ~ "ITN Online Course Website")) %>% 
  dplyr::left_join(manual_course_info) %>%
      dplyr::mutate(website = dplyr::case_when(
    website == "Advanced Reproducibility in Cancer Informatics" ~ "Advanced Reproducibility",
                                           TRUE ~ website))

## Joining with `by = join_by(website)`

traffic_plot <- web_traffic_overtime %>% 
  dplyr::filter(website %in% c("ITN Website", "OTTR website", "metricminer.org")) %>%
  dplyr::group_by(month_year) %>% 
  dplyr::count() %>% 
  ggplot2::ggplot(ggplot2::aes(y = n, x = month_year)) + 
  ggplot2::geom_bar(stat = "identity", fill = "pink") + 
  ggplot2::scale_x_date(date_labels = "%b %Y") + 
  ggplot2::ylab("Unique ITN Online Course Visitors") +
  ggplot2::xlab("") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1)) 

ggplot2::ggsave(file.path("plots", "itn_course_traffic_plot.png"), width = 4, height = 3.5)
  
traffic_plot

long_df <- itcr_course_data %>% dplyr::select(c("website", 
                                     "totalUsers",
                                     "coursera_count", 
                                     "leanpub_count", 
                                     "target_audience")) %>%
  tidyr::pivot_longer(!c(website, target_audience),
                                         names_to = "modality", 
                                         values_to = "learner_count") %>%
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
  dplyr::mutate(modality = dplyr::case_when(
    modality == "leanpub_count" ~ "Total Leanpub Enrollments", 
    modality == "coursera_count" ~ "Total Coursera Enrollments",
    modality == "totalUsers" ~ "Website Learners", 
    TRUE ~ modality
  ))

Total learners from each modality

long_df %>% 
  dplyr::group_by(modality, target_audience) %>% 
  dplyr::summarize(total_learners = sum(learner_count, na.rm = TRUE)) %>%
ggplot2::ggplot(ggplot2::aes(x = reorder(modality, -total_learners), y = total_learners, fill = target_audience)) +
  ggplot2::geom_bar(stat = "identity", na.rm = TRUE) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::ylab("Visitors/Enrollees") +
  ggplot2::geom_text(ggplot2::aes(label = total_learners), size = 3, vjust = - 1, na.rm = TRUE) + 
  ggplot2::ylim(c(0, 4200)) + 
  ggplot2::facet_wrap(~target_audience) + 
  ggplot2::scale_fill_manual(values=cbPalette)

## `summarise()` has grouped output by 'modality'. You can override using the
## `.groups` argument.

ggplot2::ggsave(file.path("plots", "itn_total_enrollments.png"), width = 8, height = 3)

Total learners by course

long_df %>% 
  dplyr::group_by(website, target_audience) %>% 
  dplyr::summarize(total_learners = sum(learner_count, na.rm = TRUE)) %>%
ggplot2::ggplot(ggplot2::aes(y = total_learners, x = reorder(website, -total_learners), fill = target_audience)) + 
  ggplot2::geom_bar(stat = "identity") + 
  ggplot2::ylab("Total learners by course") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 8)) + 
  ggplot2::geom_text(ggplot2::aes(label = total_learners), size = 3, vjust = - 1, na.rm = TRUE) +
  ggplot2::ylim(c(0, 1800)) + 
  ggplot2::xlab("") + 
  ggplot2::scale_fill_manual(values=cbPalette)

## `summarise()` has grouped output by 'website'. You can override using the
## `.groups` argument.

ggplot2::ggsave(file.path("plots", "itn_total_learners_by_course.png"), width = 10, height = 5)

long_df %>% 
  dplyr::group_by(website, target_audience) %>%
  dplyr::summarize(total_learners = sum(learner_count, na.rm = TRUE)) %>%
  dplyr::arrange(-total_learners) %>% 
  knitr::kable()

## `summarise()` has grouped output by 'website'. You can override using the
## `.groups` argument.

website	target_audience	total_learners
Reproducibility in Cancer Informatics	New to data	1690
Computing for Cancer Informatics	New to data	1562
Leadership in Cancer Informatics	Leadership	1051
Documentation and Usability	Software developers	1044
Advanced Reproducibility	Software developers	613
AI for Efficient Programming	Software developers	264
NIH Data Sharing	Leadership	175
AI for Decision Makers	Leadership	136
Choosing Genomics Tools	New to data	110
Ethical Data Handling	Leadership	60
GitHub Automation for Scientists	Software developers	25
Overleaf and Latex for Scientific Articles	Leadership	17

Course traffic by course

web_traffic_overtime %>%
  dplyr::group_by(website, month_year, target_audience) %>% 
  dplyr::count() %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>% 
ggplot2::ggplot(ggplot2::aes(y = n, x = month_year, fill = target_audience)) + 
  ggplot2::geom_bar(stat = "identity") + 
  ggplot2::scale_x_date(date_labels = "%b %Y") + 
  ggplot2::ylab("Unique ITN Website Learners Overtime") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 8)) + 
  ggplot2::facet_wrap(~website) + 
  ggplot2::scale_fill_manual(values=cbPalette)

ggplot2::ggsave(file.path("plots", "itn_course_traffic_plot_by_course.png"), width = 10, height = 5)

Number of learners by how long the course has been out

itcr_course_data %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
  ggplot2::ggplot(ggplot2::aes(x = coursera_launch, y = coursera_count, color = target_audience)) + 
  ggplot2::geom_point() + 
  ggplot2::theme_minimal() + 
  ggplot2::scale_color_manual(values=cbPalette)

## Warning: Removed 5 rows containing missing values (`geom_point()`).

itcr_course_data %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
  dplyr::mutate(duration = today() - website_launch) %>%
  ggplot2::ggplot(ggplot2::aes(x = duration, y = totalUsers, color = target_audience)) + 
  ggplot2::geom_point() + 
  ggplot2::theme_minimal() + 
  ggplot2::xlab("How long the course has been out") +
  ggplot2::scale_color_manual(values=cbPalette) + 
  ggplot2::geom_text(ggplot2::aes(x = duration, y = totalUsers, label = website), size = 3, vjust = - 1, na.rm = TRUE)

## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.

ggplot2::ggsave(file.path("plots", "itn_website_traffic_by_how_long_the_course_has_been_published.png"), width = 10, height = 5)

## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.

Software engagement

web_traffic_overtime %>% 
  dplyr::group_by(website, month_year) %>% 
  dplyr::count() %>% 
  dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
ggplot2::ggplot(ggplot2::aes(y = n, x = month_year)) + 
  ggplot2::geom_bar(stat = "identity", fill = "pink") + 
  ggplot2::scale_x_date(date_labels = "%b %Y") + 
  ggplot2::ylab("Unique ITN software website visitors") +
  ggplot2::theme_minimal() +
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1), 
                 strip.text.x = ggplot2::element_text(size = 8))

ggplot2::ggsave(file.path("plots", "itn_software_website_traffic.png"), width = 10, height = 5)

CRAN downloads

 #download_stats <- cran_stats(c("ottrpal", "conrad", "ari", "text2speech"))

 #download_stats %>% dplyr::summarize(download_total = sum(downloads))

 #download_stats %>% dplyr::group_by(package) %>%
 #  dplyr::summarize(download_total = sum(downloads))

#sum(download_stats$downloads) + 426

#if (!is.null(download_stats)) {
#  print(head(download_stats))
#  ggplot(download_stats, aes(end, downloads, group=package, color=package)) +
#    geom_line() +
#    geom_point() +
#    scale_y_log10() + 
#    theme_minimal()
  
#  ggplot2::ggsave(file.path("plots", "itn_software_cran_downloads.png"), width = 10, height = 5)
# }

Coursera

ggplot2::ggplot(itcr_course_data %>% dplyr::filter(coursera_count > 0), ggplot2::aes(x = reorder(website, -coursera_count), y = coursera_count, fill = target_audience)) +
  ggplot2::geom_bar(stat = "identity", na.rm = TRUE) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::ylab("Coursera enrollments") +
  ggplot2::geom_text(ggplot2::aes(label = coursera_count), size = 3, vjust = - 1, na.rm = TRUE) +
  ggplot2::ylim(c(0, 1200)) + 
  ggplot2::scale_fill_manual(values=cbPalette)

  ggplot2::ggsave(file.path("plots", "itn_coursera_enrollments.png"), width = 4, height = 2)

Leanpub

ggplot2::ggplot(itcr_course_data %>% dplyr::filter(leanpub_count > 0) , ggplot2::aes(x = reorder(website, -leanpub_count), y = leanpub_count, fill = target_audience)) +
  ggplot2::geom_bar(stat = "identity", na.rm = TRUE) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
  ggplot2::xlab("") +
  ggplot2::ylab("Leanpub enrollments") +
  ggplot2::geom_text(ggplot2::aes(label = leanpub_count), size = 3, vjust = - 1, na.rm = TRUE) +
  ggplot2::ylim(c(0, 40)) + 
  ggplot2::scale_fill_manual(values=cbPalette)

ggplot2::ggsave(file.path("plots", "itn_leanpub_enrollments.png"), width = 4, height = 2)

Session Info

sessionInfo()

## R version 4.3.1 (2023-06-16)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.5.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] dlstats_0.1.7     ggplot2_3.4.3     metricminer_0.1.0 testthat_3.2.0   
## [5] magrittr_2.0.3   
## 
## loaded via a namespace (and not attached):
##   [1] remotes_2.4.2.1     rlang_1.1.2         snakecase_0.11.1   
##   [4] compiler_4.3.1      systemfonts_1.0.4   callr_3.7.3        
##   [7] vctrs_0.6.5         stringr_1.5.1       profvis_0.3.8      
##  [10] pkgconfig_2.0.3     crayon_1.5.2        fastmap_1.1.1      
##  [13] ellipsis_0.3.2      labeling_0.4.3      utf8_1.2.4         
##  [16] promises_1.2.1      rmarkdown_2.25      sessioninfo_1.2.2  
##  [19] tzdb_0.4.0          ps_1.7.5            ragg_1.2.5         
##  [22] purrr_1.0.2         bit_4.0.5           xfun_0.41          
##  [25] cachem_1.0.8        jsonlite_1.8.8      highr_0.10         
##  [28] later_1.3.1         parallel_4.3.1      prettyunits_1.2.0  
##  [31] R6_2.5.1            bslib_0.6.1         stringi_1.8.3      
##  [34] RColorBrewer_1.1-3  pkgload_1.3.3       brio_1.1.3         
##  [37] lubridate_1.9.3     jquerylib_0.1.4     cellranger_1.1.0   
##  [40] udpipe_0.8.11       Rcpp_1.0.11         assertthat_0.2.1   
##  [43] knitr_1.45          usethis_2.2.2       readr_2.1.4        
##  [46] httpuv_1.6.11       Matrix_1.6-1.1      timechange_0.2.0   
##  [49] tidyselect_1.2.0    rstudioapi_0.15.0   yaml_2.3.8         
##  [52] miniUI_0.1.1.1      curl_5.2.0          processx_3.8.3     
##  [55] pkgbuild_1.4.2      lattice_0.21-8      tibble_3.2.1       
##  [58] shiny_1.7.5         withr_2.5.2         askpass_1.2.0      
##  [61] evaluate_0.23       desc_1.4.2          urlchecker_1.0.1   
##  [64] pillar_1.9.0        generics_0.1.3      vroom_1.6.3        
##  [67] rprojroot_2.0.4     hms_1.1.3           munsell_0.5.0      
##  [70] scales_1.2.1        xtable_1.8-4        glue_1.6.2         
##  [73] janitor_2.2.0       tools_4.3.1         data.table_1.14.8  
##  [76] fs_1.6.3            grid_4.3.1          tidyr_1.3.0        
##  [79] gh_1.4.0            devtools_2.4.5      colorspace_2.1-0   
##  [82] googlesheets4_1.1.1 googledrive_2.1.1   cli_3.6.2          
##  [85] textshaping_0.3.6   fansi_1.0.6         gargle_1.5.2       
##  [88] dplyr_1.1.4         gtable_0.3.4        sass_0.4.8         
##  [91] digest_0.6.33       wordcloud_2.6       htmlwidgets_1.6.2  
##  [94] farver_2.1.1        memoise_2.0.1       htmltools_0.5.7    
##  [97] lifecycle_1.0.4     httr_1.4.7          mime_0.12          
## [100] openssl_2.1.1       bit64_4.0.5

ITN Eval Metrics

Candace Savonen

2024-01-05

Data Set Up

Collect online course data

Get OTTR courses

Collaboration Info

Collect Loqui Video Creation User data

Unique users of Loqui

Number of videos made with Loqui

Collect Workshop Feedback Info

Data Visualizations

Workshop Relevance Feedback

Wordclouds for Qualitative Workshop Feedback

What did you like most about the workshop?

Get number of unique workshop attendees

Unique visitors to websites

Table of user engagement

ITN Course and Website Traffic Overtime

Total learners from each modality

Total learners by course

Course traffic by course

Number of learners by how long the course has been out

Software engagement

CRAN downloads

Coursera

Leanpub

Session Info