Data Set Up
dir.create("data", showWarnings = FALSE)
dir.create("plots", showWarnings = FALSE)
set.seed(1234)
devtools::load_all("../metricminer/")
## ℹ Loading metricminer
library("ggplot2")
library("dlstats")
library("magrittr")
source(file.path("local_auth_2.R"))
## You chose to cache your credentials, if you change your mind, run metricminer::delete_creds().
##
## Be careful not to push .httr-oauth or RDS files to GitHub or share it anywhere.
auth_from_secret("calendly", token = Sys.getenv("METRICMINER_CALENDLY"))
auth_from_secret("google",
refresh_token = Sys.getenv("METRICMINER_GOOGLE_REFRESH"),
access_token = Sys.getenv("METRICMINER_GOOGLE_ACCESS"),
cache = TRUE)
## You chose to cache your credentials, if you change your mind, run metricminer::delete_creds().
##
## Be careful not to push .httr-oauth or RDS files to GitHub or share it anywhere.
auth_from_secret("github", token = Sys.getenv("METRICMINER_GITHUB_PAT"))
ga_accounts <- get_ga_user()
## Using user-supplied cached token using authorize("google")
## Auto-refreshing stale OAuth token.
calendly_account <- get_calendly_user()
## Using user-supplied cached token using authorize("calendly")
cbPalette <- c("#E69F00", "#56B4E9", "#CC79A7")
Collect online course data
if (!file.exists(file.path("data", "itcr_ga_metric_data.RDS"))) {
fhdsl_stats_list <- get_all_ga_metrics(account_id = ga_accounts$id[1])
itcr_stats_list <- get_all_ga_metrics(account_id = ga_accounts$id[2])
# There's some google analytics that aren't ITCR courses
not_itcr <- c("hutchdatasci", "whoiswho", "MMDS", "FH Cluster 101", "AnVIL_Researcher_Journey")
# Set up each data frame
ga_metrics <- dplyr::bind_rows(fhdsl_stats_list$metrics ,itcr_stats_list$metrics) %>%
dplyr::filter(
!(website %in%not_itcr)
)
saveRDS(ga_metrics, file.path("data","itcr_ga_metric_data.RDS"))
ga_dims <- dplyr::bind_rows(fhdsl_stats_list$dimensions, itcr_stats_list$dimensions) %>%
dplyr::filter(
!(website %in% not_itcr)
)
saveRDS(ga_dims, file.path("data","itcr_ga_dims_data.RDS"))
ga_link_clicks <- dplyr::bind_rows(fhdsl_stats_list$link_clicks,itcr_stats_list$link_clicks) %>%
dplyr::filter(
!(website %in% not_itcr)
)
saveRDS(ga_link_clicks, file.path("data","itcr_ga_link_click_data.RDS"))
} else {
ga_metrics <- readRDS(file.path("data","itcr_ga_metric_data.RDS"))
ga_dims <- readRDS(file.path("data","itcr_ga_dims_data.RDS"))
ga_link_clicks <- readRDS(file.path("data","itcr_ga_link_click_data.RDS"))
}
manual_course_info <- googlesheets4::read_sheet(
"https://docs.google.com/spreadsheets/d/1-8vox2LzkVKzhmSFXCWjwt3jFtK-wHibRAq2fqbxEyo/edit#gid=1550012125", sheet = "Course_data",
col_types = "ccDDDciii") %>%
dplyr::mutate_if(is.numeric.Date, lubridate::ymd)
## ✔ Reading from "ITN_collaborations_and_course_metrics".
## ✔ Range ''Course_data''.
## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion
## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion
## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion
## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion
## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion
## Warning in .Primitive("as.integer")(x, ...): NAs introduced by coercion
# Join this all together
itcr_course_data <- ga_metrics %>%
dplyr::left_join(manual_course_info) %>%
dplyr::mutate(website = dplyr::case_when(
website == "Advanced Reproducibility in Cancer Informatics" ~ "Advanced Reproducibility",
TRUE ~ website))
## Joining with `by = join_by(website)`
# Save this to a TSV
readr::write_tsv(itcr_course_data, file.path("data", "itcr_course_metrics.tsv"))
Get OTTR courses
sync_yamls <- c(
"https://raw.githubusercontent.com/jhudsl/OTTR_Template/main/.github/sync.yml",
"https://raw.githubusercontent.com/C-MOOR/C-MOOR_Template//main/.github/sync.yml",
"https://raw.githubusercontent.com/jhudsl/AnVIL_Template/main/.github/sync.yml",
"https://raw.githubusercontent.com/datatrail-jhu/DataTrail_Template/main/.github/sync.yml"
)
extract_repos <- function(yaml) {
content <- yaml::read_yaml(yaml)
repos <- unlist(purrr::map(content$group, "repos"))
repos <- unlist(strsplit(repos, "\n"))
return(repos)
}
all_ottr_repos <- unlist(lapply(sync_yamls, extract_repos))
ottr_df <- data.frame(repo_name = all_ottr_repos) %>%
tidyr::separate(repo_name, into = c("organization", "repo"), sep = "\\/", remove = FALSE) %>%
dplyr::mutate(template = dplyr::case_when(
stringr::str_detect(repo, "template|Template") ~ "template",
TRUE ~ "course"
)) %>%
dplyr::distinct()
ottr_df %>% dplyr::count(organization)
## organization n
## 1 C-MOOR 3
## 2 PracticalGenomics 1
## 3 abyzovlab 1
## 4 datatrail-jhu 15
## 5 fhdsl 23
## 6 griffithlab 3
## 7 jhudsl 26
## 8 mccoy-lab 1
## 9 opencasestudies 1
## 10 tmm211 1
ottr_df %>%
dplyr::filter(template == "course") %>%
dplyr::count()
## n
## 1 69
Collaboration Info
collabs <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1-8vox2LzkVKzhmSFXCWjwt3jFtK-wHibRAq2fqbxEyo/edit#gid=0")
## ✔ Reading from "ITN_collaborations_and_course_metrics".
## ✔ Range 'SuccesfulCollabs'.
nrow(collabs)
## [1] 106
collabs %>% dplyr::count(ITN_ITCR_or_external) %>%
dplyr::mutate(perc = n/sum(n))
## # A tibble: 8 × 3
## ITN_ITCR_or_external n perc
## <chr> <int> <dbl>
## 1 ITCR 67 0.632
## 2 ITN 5 0.0472
## 3 NCI 2 0.0189
## 4 NIH 2 0.0189
## 5 external 2 0.0189
## 6 external (was after leaving) 1 0.00943
## 7 external NIH/NCI intermural 3 0.0283
## 8 neither 24 0.226
collabs <- collabs %>% tidyr::separate_rows("Category", sep = ", ", ) %>%
dplyr::mutate(Category = trimws(Category)) %>%
dplyr::filter(Category != "?")
nrow(collabs)
## [1] 154
collabs %>% dplyr::count(Category) %>%
ggplot2::ggplot(ggplot2::aes(y = n, x = reorder(Category,-n), fill = Category)) +
ggplot2::geom_bar(position = "dodge", stat = "identity") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1),
strip.text.x = ggplot2::element_text(size = 6),
legend.position="none",
plot.margin = unit(c(.75,.5,.5,.5), "cm")) +
ggplot2::xlab("")
ggplot2::ggsave(file.path("plots", "itn_collaboration_types.png"), width = 7, height = 5)
open_agenda <- readLines("ITCR OPEN Group Agenda.txt")
dates <- grep("20", open_agenda, value = TRUE)
dates <- gsub(" Meeting| meeting", "", dates)
month <- stringr::word(dates, sep = " ")
year <- stringr::word(dates, sep = " ", start = 2)
names <- grep("20", open_agenda)
attendance <- abs(names[1:length(names)] - c(names[2:length(names)], length(open_agenda)))
open_attendance <- data.frame(
date = lubridate::ym(paste(year,"-", month)),
attendance)
ggplot2::ggplot(open_attendance, ggplot2::aes(x = date, y = attendance)) +
ggplot2::geom_bar(stat = "identity", fill = "lightgreen") +
ggplot2::theme_classic() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
ggplot2::xlab("") +
ggplot2::geom_text(ggplot2::aes(label = attendance), size = 3, vjust = - 1)
ggplot2::ggsave(file.path("plots", "open_attendance.png"), width = 4, height = 3)
Collect Loqui Video Creation User data
Unique users of Loqui
loqui_usage <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1G_HTU-bv2k5txExP8EH3ScUfGqtW1P3syThD84Z-g9k/edit#gid=0")
## ✔ Reading from "Loqui User Data".
## ✔ Range 'Sheet1'.
loqui_usage %>% count(email) %>% dplyr::pull(email) %>% length()
## [1] 18
Number of videos made with Loqui
loqui_usage %>% count(email) %>% dplyr::pull(n) %>% sum()
## [1] 477
Collect Workshop Feedback Info
if (!file.exists(file.path("data", "itcr_slido_data.RDS"))) {
itcr_drive_id <- "https://drive.google.com/drive/folders/0AJb5Zemj0AAkUk9PVA"
itcr_slido_data <- get_slido_files(itcr_drive_id)
saveRDS(itcr_slido_data, file.path("data", "itcr_slido_data.RDS"))
} else {
itcr_slido_data <- readRDS(file.path("data", "itcr_slido_data.RDS"))
}
poll_data <- itcr_slido_data$`Polls-per-user` %>%
janitor::clean_names()
promoters_categories <- poll_data %>%
dplyr::mutate(how_likely_would_you_be_to_recommend_this_workshop =
as.numeric(how_likely_would_you_be_to_recommend_this_workshop),
promoter = dplyr::case_when(
how_likely_would_you_be_to_recommend_this_workshop < 7 ~ "detractors",
how_likely_would_you_be_to_recommend_this_workshop == 7 ~ "passives",
how_likely_would_you_be_to_recommend_this_workshop == 8 ~ "passives",
how_likely_would_you_be_to_recommend_this_workshop > 8 ~ "promoters",
TRUE ~ NA)) %>%
dplyr::count(promoter)
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `how_likely_would_you_be_to_recommend_this_workshop =
## as.numeric(how_likely_would_you_be_to_recommend_this_workshop)`.
## Caused by warning:
## ! NAs introduced by coercion
promoter_sums <- promoters_categories$n
names(promoter_sums) <- promoters_categories$promoter
sum(promoter_sums, na.rm = TRUE)
## [1] 119
# Net Promoter Score
promoter_sums["promoters"]/sum(promoter_sums, na.rm = TRUE)- promoter_sums["detractors"]/sum(promoter_sums, na.rm = TRUE)
## promoters
## 0.2521008
Data Visualizations
How likely would you be to recommend this workshop?
as.numeric(c(poll_data$how_likely_would_you_be_to_recommend_this_workshop, poll_data$how_likely_would_you_be_to_recommend_this_workshop_2)) %>%
ggplot2::qplot(geom = "bar") +
ggplot2::geom_bar(fill = "#CBC3E3") +
ggplot2::theme_classic() +
ggplot2::labs(title = "How likely would you be to recommend this workshop?")
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning: Removed 165 rows containing non-finite values (`stat_count()`).
## Removed 165 rows containing non-finite values (`stat_count()`).
ggplot2::ggsave(file.path("plots", "itn_workshop_rec.png"), width = 4, height = 2)
## Warning: Removed 165 rows containing non-finite values (`stat_count()`).
## Removed 165 rows containing non-finite values (`stat_count()`).
Workshop Relevance Feedback
poll_data <- poll_data %>%
dplyr::filter(how_likely_are_you_to_use_what_you_learned_in_your_daily_work %in% c("Extremely likely", "Likely", "Not very likely", "Somewhat likely", "Very likely"))
poll_data$how_likely_are_you_to_use_what_you_learned_in_your_daily_work <- factor(poll_data$how_likely_are_you_to_use_what_you_learned_in_your_daily_work, levels = c("Not very likely", "Somewhat likely", "Likely", "Very likely", "Extremely likely"))
ggplot2::ggplot(poll_data, ggplot2::aes(x = how_likely_are_you_to_use_what_you_learned_in_your_daily_work)) +
ggplot2::geom_bar(stat = "count", fill = "#CBC3E3") +
ggplot2::theme_classic() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
ggplot2::labs(title = "How likely are you to use what you learned in your daily work?") +
ggplot2::xlab("")
ggplot2::ggsave(file.path("plots", "itn_relevance.png"), width = 4, height = 2)
Wordclouds for Qualitative Workshop Feedback
ud_model <- udpipe::udpipe_download_model(language = "english")
## Downloading udpipe model from https://raw.githubusercontent.com/jwijffels/udpipe.models.ud.2.5/master/inst/udpipe-ud-2.5-191206/english-ewt-ud-2.5-191206.udpipe to /Users/candacesavonen/Desktop/GitRepos/Hutch/itn-metric/english-ewt-ud-2.5-191206.udpipe
## - This model has been trained on version 2.5 of data from https://universaldependencies.org
## - The model is distributed under the CC-BY-SA-NC license: https://creativecommons.org/licenses/by-nc-sa/4.0
## - Visit https://github.com/jwijffels/udpipe.models.ud.2.5 for model license details.
## - For a list of all models and their licenses (most models you can download with this package have either a CC-BY-SA or a CC-BY-SA-NC license) read the documentation at ?udpipe_download_model. For building your own models: visit the documentation by typing vignette('udpipe-train', package = 'udpipe')
## Downloading finished, model stored at '/Users/candacesavonen/Desktop/GitRepos/Hutch/itn-metric/english-ewt-ud-2.5-191206.udpipe'
ud_model <- udpipe::udpipe_load_model(ud_model$file_model)
What did you like most about the workshop?
results <- udpipe::udpipe_annotate(ud_model, x = poll_data$what_did_you_like_most_about_the_workshop) %>%
as.data.frame() %>%
dplyr::filter(upos %in% c("NOUN", "ADJ", "ADV")) %>%
dplyr::mutate(lemma= tolower(lemma)) %>%
dplyr::count(lemma)
wordcloud::wordcloud(words = results$lemma, freq=results$n,colors = c("#98fb98", "#83D475", "#355E3B"),
min.freq = 3, scale = c(3, .4))
Please share any recommendations you have for improvements
rec_results <- udpipe::udpipe_annotate(ud_model, x = poll_data$please_share_any_recommendations_you_have_for_improvements) %>%
as.data.frame() %>%
dplyr::filter(upos %in% c("NOUN", "ADJ", "ADV")) %>%
dplyr::mutate(lemma= tolower(lemma)) %>%
dplyr::count(lemma)
wordcloud::wordcloud(words = rec_results$lemma, freq=rec_results$n,colors = c("#98fb98", "#83D475", "#355E3B"),
min.freq = 3, scale = c(4, .4))
Get number of unique workshop attendees
unique_emails <-
unique(c(itcr_slido_data$`Polls-per-user`$`Please submit your email so we can log your attendance`,
itcr_slido_data$`Polls-per-user`$`What's your email?`,
itcr_slido_data$`Polls-per-user`$`What is your email?`))
length(unique_emails)
## [1] 47
Unique visitors to websites
ggplot2::ggplot(itcr_course_data, ggplot2::aes(x = reorder(website, -totalUsers), y = totalUsers, fill = target_audience)) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::theme_classic() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
ggplot2::xlab("") +
ggplot2::geom_text(ggplot2::aes(label = totalUsers), size = 3, vjust = - 1) +
ggplot2::ylim(c(0, 6000)) +
ggplot2::scale_fill_manual(values=cbPalette)
ggplot2::ggsave(file.path("plots", "itn_website_visits.png"), width = 4, height = 3)
Table of user engagement
user_totals <- ga_metrics %>%
janitor::clean_names() %>%
dplyr::select(website, active_users, average_session_duration) %>%
knitr::kable(digits=2, long_table = TRUE,padding = 2) # %>%
user_totals
AI for Efficient Programming |
120 |
362.52 |
NIH Data Sharing |
172 |
230.25 |
AI for Decision Makers |
42 |
598.87 |
ITN Website |
5542 |
181.93 |
Leadership in Cancer Informatics |
396 |
229.81 |
Documentation and Usability |
734 |
192.49 |
Computing for Cancer Informatics |
610 |
279.10 |
Reproducibility in Cancer Informatics |
1315 |
194.69 |
Advanced Reproducibility in Cancer Informatics |
334 |
221.12 |
Ethical Data Handling |
59 |
278.85 |
OTTR website |
787 |
67.77 |
Choosing Genomics Tools |
97 |
248.51 |
Overleaf and Latex for Scientific Articles |
17 |
471.66 |
GitHub Automation for Scientists |
23 |
202.12 |
metricminer.org |
2 |
26.40 |
user_engagement <- ga_metrics %>%
janitor::clean_names() %>%
dplyr::select(website, screen_page_views_per_user, sessions, screen_page_views, engagement_rate) %>%
knitr::kable(digits=2, long_table = TRUE, padding = 2) # %>%
user_engagement
AI for Efficient Programming |
6.85 |
335 |
822 |
0.55 |
NIH Data Sharing |
5.06 |
364 |
871 |
0.43 |
AI for Decision Makers |
32.60 |
253 |
1369 |
0.69 |
ITN Website |
2.58 |
8889 |
14310 |
0.45 |
Leadership in Cancer Informatics |
3.76 |
752 |
1489 |
0.52 |
Documentation and Usability |
2.66 |
1080 |
1952 |
0.52 |
Computing for Cancer Informatics |
6.01 |
1317 |
3666 |
0.52 |
Reproducibility in Cancer Informatics |
2.97 |
2080 |
3908 |
0.45 |
Advanced Reproducibility in Cancer Informatics |
4.08 |
712 |
1364 |
0.51 |
Ethical Data Handling |
8.31 |
207 |
490 |
0.51 |
OTTR website |
1.41 |
1044 |
1112 |
0.31 |
Choosing Genomics Tools |
6.20 |
253 |
601 |
0.58 |
Overleaf and Latex for Scientific Articles |
9.94 |
49 |
169 |
0.61 |
GitHub Automation for Scientists |
3.91 |
44 |
90 |
0.50 |
metricminer.org |
2.00 |
3 |
4 |
0.67 |
itcr_course_data %>% janitor::clean_names() %>%
dplyr::select(website, screen_page_views_per_user, average_session_duration, event_count_per_user, engagement_rate, target_audience) %>%
tidyr::pivot_longer(!c(website, target_audience),
names_to = "metric_name",
values_to = "value") %>%
dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
ggplot2::ggplot(ggplot2::aes(y = value, x = website, fill = target_audience)) +
ggplot2::geom_bar(position = "dodge", stat = "identity") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1),
strip.text.x = ggplot2::element_text(size = 8),
plot.margin = unit(c(1.5,.5,.5,.5), "cm")) +
ggplot2::facet_wrap(~metric_name, scales = "free_y") +
ggplot2::scale_fill_manual(values=cbPalette)
ggplot2::ggsave(file.path("plots", "itn_engagement_stats.png"), width = 8, height = 6)
ITN Course and Website Traffic Overtime
web_traffic_overtime <- ga_dims %>%
dplyr::mutate(date = lubridate::ymd(paste0(year, "-", month, "-", day))) %>%
dplyr::mutate(month_year = lubridate::ym(paste0(year, "-", month))) %>%
dplyr::mutate(web_yn = dplyr::case_when(
website == "ITN Website" ~ "ITN Website",
website != "ITN Website" ~ "ITN Online Course Website")) %>%
dplyr::left_join(manual_course_info) %>%
dplyr::mutate(website = dplyr::case_when(
website == "Advanced Reproducibility in Cancer Informatics" ~ "Advanced Reproducibility",
TRUE ~ website))
## Joining with `by = join_by(website)`
traffic_plot <- web_traffic_overtime %>%
dplyr::filter(website %in% c("ITN Website", "OTTR website", "metricminer.org")) %>%
dplyr::group_by(month_year) %>%
dplyr::count() %>%
ggplot2::ggplot(ggplot2::aes(y = n, x = month_year)) +
ggplot2::geom_bar(stat = "identity", fill = "pink") +
ggplot2::scale_x_date(date_labels = "%b %Y") +
ggplot2::ylab("Unique ITN Online Course Visitors") +
ggplot2::xlab("") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1))
ggplot2::ggsave(file.path("plots", "itn_course_traffic_plot.png"), width = 4, height = 3.5)
traffic_plot
long_df <- itcr_course_data %>% dplyr::select(c("website",
"totalUsers",
"coursera_count",
"leanpub_count",
"target_audience")) %>%
tidyr::pivot_longer(!c(website, target_audience),
names_to = "modality",
values_to = "learner_count") %>%
dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
dplyr::mutate(modality = dplyr::case_when(
modality == "leanpub_count" ~ "Total Leanpub Enrollments",
modality == "coursera_count" ~ "Total Coursera Enrollments",
modality == "totalUsers" ~ "Website Learners",
TRUE ~ modality
))
Total learners from each modality
long_df %>%
dplyr::group_by(modality, target_audience) %>%
dplyr::summarize(total_learners = sum(learner_count, na.rm = TRUE)) %>%
ggplot2::ggplot(ggplot2::aes(x = reorder(modality, -total_learners), y = total_learners, fill = target_audience)) +
ggplot2::geom_bar(stat = "identity", na.rm = TRUE) +
ggplot2::theme_classic() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
ggplot2::xlab("") +
ggplot2::ylab("Visitors/Enrollees") +
ggplot2::geom_text(ggplot2::aes(label = total_learners), size = 3, vjust = - 1, na.rm = TRUE) +
ggplot2::ylim(c(0, 4200)) +
ggplot2::facet_wrap(~target_audience) +
ggplot2::scale_fill_manual(values=cbPalette)
## `summarise()` has grouped output by 'modality'. You can override using the
## `.groups` argument.
ggplot2::ggsave(file.path("plots", "itn_total_enrollments.png"), width = 8, height = 3)
Total learners by course
long_df %>%
dplyr::group_by(website, target_audience) %>%
dplyr::summarize(total_learners = sum(learner_count, na.rm = TRUE)) %>%
ggplot2::ggplot(ggplot2::aes(y = total_learners, x = reorder(website, -total_learners), fill = target_audience)) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::ylab("Total learners by course") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1),
strip.text.x = ggplot2::element_text(size = 8)) +
ggplot2::geom_text(ggplot2::aes(label = total_learners), size = 3, vjust = - 1, na.rm = TRUE) +
ggplot2::ylim(c(0, 1800)) +
ggplot2::xlab("") +
ggplot2::scale_fill_manual(values=cbPalette)
## `summarise()` has grouped output by 'website'. You can override using the
## `.groups` argument.
ggplot2::ggsave(file.path("plots", "itn_total_learners_by_course.png"), width = 10, height = 5)
long_df %>%
dplyr::group_by(website, target_audience) %>%
dplyr::summarize(total_learners = sum(learner_count, na.rm = TRUE)) %>%
dplyr::arrange(-total_learners) %>%
knitr::kable()
## `summarise()` has grouped output by 'website'. You can override using the
## `.groups` argument.
Reproducibility in Cancer Informatics |
New to data |
1690 |
Computing for Cancer Informatics |
New to data |
1562 |
Leadership in Cancer Informatics |
Leadership |
1051 |
Documentation and Usability |
Software developers |
1044 |
Advanced Reproducibility |
Software developers |
613 |
AI for Efficient Programming |
Software developers |
264 |
NIH Data Sharing |
Leadership |
175 |
AI for Decision Makers |
Leadership |
136 |
Choosing Genomics Tools |
New to data |
110 |
Ethical Data Handling |
Leadership |
60 |
GitHub Automation for Scientists |
Software developers |
25 |
Overleaf and Latex for Scientific Articles |
Leadership |
17 |
Course traffic by course
web_traffic_overtime %>%
dplyr::group_by(website, month_year, target_audience) %>%
dplyr::count() %>%
dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
ggplot2::ggplot(ggplot2::aes(y = n, x = month_year, fill = target_audience)) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::scale_x_date(date_labels = "%b %Y") +
ggplot2::ylab("Unique ITN Website Learners Overtime") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1),
strip.text.x = ggplot2::element_text(size = 8)) +
ggplot2::facet_wrap(~website) +
ggplot2::scale_fill_manual(values=cbPalette)
ggplot2::ggsave(file.path("plots", "itn_course_traffic_plot_by_course.png"), width = 10, height = 5)
Number of learners by how long the course has been out
itcr_course_data %>%
dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
ggplot2::ggplot(ggplot2::aes(x = coursera_launch, y = coursera_count, color = target_audience)) +
ggplot2::geom_point() +
ggplot2::theme_minimal() +
ggplot2::scale_color_manual(values=cbPalette)
## Warning: Removed 5 rows containing missing values (`geom_point()`).
itcr_course_data %>%
dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
dplyr::mutate(duration = today() - website_launch) %>%
ggplot2::ggplot(ggplot2::aes(x = duration, y = totalUsers, color = target_audience)) +
ggplot2::geom_point() +
ggplot2::theme_minimal() +
ggplot2::xlab("How long the course has been out") +
ggplot2::scale_color_manual(values=cbPalette) +
ggplot2::geom_text(ggplot2::aes(x = duration, y = totalUsers, label = website), size = 3, vjust = - 1, na.rm = TRUE)
## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.
ggplot2::ggsave(file.path("plots", "itn_website_traffic_by_how_long_the_course_has_been_published.png"), width = 10, height = 5)
## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.
Software engagement
web_traffic_overtime %>%
dplyr::group_by(website, month_year) %>%
dplyr::count() %>%
dplyr::filter(!(website %in% c("ITN Website", "OTTR website", "metricminer.org"))) %>%
ggplot2::ggplot(ggplot2::aes(y = n, x = month_year)) +
ggplot2::geom_bar(stat = "identity", fill = "pink") +
ggplot2::scale_x_date(date_labels = "%b %Y") +
ggplot2::ylab("Unique ITN software website visitors") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x=ggplot2::element_text(angle=60, hjust=1),
strip.text.x = ggplot2::element_text(size = 8))
ggplot2::ggsave(file.path("plots", "itn_software_website_traffic.png"), width = 10, height = 5)
CRAN downloads
#download_stats <- cran_stats(c("ottrpal", "conrad", "ari", "text2speech"))
#download_stats %>% dplyr::summarize(download_total = sum(downloads))
#download_stats %>% dplyr::group_by(package) %>%
# dplyr::summarize(download_total = sum(downloads))
#sum(download_stats$downloads) + 426
#if (!is.null(download_stats)) {
# print(head(download_stats))
# ggplot(download_stats, aes(end, downloads, group=package, color=package)) +
# geom_line() +
# geom_point() +
# scale_y_log10() +
# theme_minimal()
# ggplot2::ggsave(file.path("plots", "itn_software_cran_downloads.png"), width = 10, height = 5)
# }
Coursera
ggplot2::ggplot(itcr_course_data %>% dplyr::filter(coursera_count > 0), ggplot2::aes(x = reorder(website, -coursera_count), y = coursera_count, fill = target_audience)) +
ggplot2::geom_bar(stat = "identity", na.rm = TRUE) +
ggplot2::theme_classic() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
ggplot2::xlab("") +
ggplot2::ylab("Coursera enrollments") +
ggplot2::geom_text(ggplot2::aes(label = coursera_count), size = 3, vjust = - 1, na.rm = TRUE) +
ggplot2::ylim(c(0, 1200)) +
ggplot2::scale_fill_manual(values=cbPalette)
ggplot2::ggsave(file.path("plots", "itn_coursera_enrollments.png"), width = 4, height = 2)
Leanpub
ggplot2::ggplot(itcr_course_data %>% dplyr::filter(leanpub_count > 0) , ggplot2::aes(x = reorder(website, -leanpub_count), y = leanpub_count, fill = target_audience)) +
ggplot2::geom_bar(stat = "identity", na.rm = TRUE) +
ggplot2::theme_classic() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
ggplot2::xlab("") +
ggplot2::ylab("Leanpub enrollments") +
ggplot2::geom_text(ggplot2::aes(label = leanpub_count), size = 3, vjust = - 1, na.rm = TRUE) +
ggplot2::ylim(c(0, 40)) +
ggplot2::scale_fill_manual(values=cbPalette)
ggplot2::ggsave(file.path("plots", "itn_leanpub_enrollments.png"), width = 4, height = 2)
Session Info
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.5.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] dlstats_0.1.7 ggplot2_3.4.3 metricminer_0.1.0 testthat_3.2.0
## [5] magrittr_2.0.3
##
## loaded via a namespace (and not attached):
## [1] remotes_2.4.2.1 rlang_1.1.2 snakecase_0.11.1
## [4] compiler_4.3.1 systemfonts_1.0.4 callr_3.7.3
## [7] vctrs_0.6.5 stringr_1.5.1 profvis_0.3.8
## [10] pkgconfig_2.0.3 crayon_1.5.2 fastmap_1.1.1
## [13] ellipsis_0.3.2 labeling_0.4.3 utf8_1.2.4
## [16] promises_1.2.1 rmarkdown_2.25 sessioninfo_1.2.2
## [19] tzdb_0.4.0 ps_1.7.5 ragg_1.2.5
## [22] purrr_1.0.2 bit_4.0.5 xfun_0.41
## [25] cachem_1.0.8 jsonlite_1.8.8 highr_0.10
## [28] later_1.3.1 parallel_4.3.1 prettyunits_1.2.0
## [31] R6_2.5.1 bslib_0.6.1 stringi_1.8.3
## [34] RColorBrewer_1.1-3 pkgload_1.3.3 brio_1.1.3
## [37] lubridate_1.9.3 jquerylib_0.1.4 cellranger_1.1.0
## [40] udpipe_0.8.11 Rcpp_1.0.11 assertthat_0.2.1
## [43] knitr_1.45 usethis_2.2.2 readr_2.1.4
## [46] httpuv_1.6.11 Matrix_1.6-1.1 timechange_0.2.0
## [49] tidyselect_1.2.0 rstudioapi_0.15.0 yaml_2.3.8
## [52] miniUI_0.1.1.1 curl_5.2.0 processx_3.8.3
## [55] pkgbuild_1.4.2 lattice_0.21-8 tibble_3.2.1
## [58] shiny_1.7.5 withr_2.5.2 askpass_1.2.0
## [61] evaluate_0.23 desc_1.4.2 urlchecker_1.0.1
## [64] pillar_1.9.0 generics_0.1.3 vroom_1.6.3
## [67] rprojroot_2.0.4 hms_1.1.3 munsell_0.5.0
## [70] scales_1.2.1 xtable_1.8-4 glue_1.6.2
## [73] janitor_2.2.0 tools_4.3.1 data.table_1.14.8
## [76] fs_1.6.3 grid_4.3.1 tidyr_1.3.0
## [79] gh_1.4.0 devtools_2.4.5 colorspace_2.1-0
## [82] googlesheets4_1.1.1 googledrive_2.1.1 cli_3.6.2
## [85] textshaping_0.3.6 fansi_1.0.6 gargle_1.5.2
## [88] dplyr_1.1.4 gtable_0.3.4 sass_0.4.8
## [91] digest_0.6.33 wordcloud_2.6 htmlwidgets_1.6.2
## [94] farver_2.1.1 memoise_2.0.1 htmltools_0.5.7
## [97] lifecycle_1.0.4 httr_1.4.7 mime_0.12
## [100] openssl_2.1.1 bit64_4.0.5