B Generating the Data (R Script)

For transparency (and for those interested in data simulation), we generated the dataset for this book using the R programming language.

If you wish to generate a larger dataset (e.g., 1 million rows) to test Power BI’s performance, you can run the script below in RStudio.

# --- Data Generation Script for Power BI Training ---
# Simulates a Public Health Line List
set.seed(2025) 

# 1. Setup
n_rows <- 10000
districts <- c("Highland", "Coastal")
facility_types <- c("Hospital", "Health Center", "Dispensary")
owners <- c("Government", "Private", "NGO", "FBO")

# 2. Generate Facilities (Dimension)
facilities <- data.frame(
  FacilityID = paste0("FAC", 101:150),
  FacilityName = paste("Facility", 1:50),
  District = sample(districts, 50, replace = TRUE),
  Ownership = sample(owners, 50, replace = TRUE),
  Type = sample(facility_types, 50, replace = TRUE)
)
write.csv(facilities, "dim_facility.csv", row.names = FALSE)

# 3. Generate Visits (Fact)
# Logic: Coastal district has high cases but low treatment (Stockouts)
dates <- seq(as.Date("2024-01-01"), as.Date("2024-12-31"), by="day")

generate_visit <- function(id) {
  fac_id <- sample(facilities$FacilityID, 1)
  district <- facilities$District[facilities$FacilityID == fac_id]
  
  # Demographics
  age <- sample(0:80, 1)
  sex <- sample(c("Male", "Female"), 1)
  visit_date <- sample(dates, 1)
  
  # Clinical Logic
  test_result <- sample(c("Positive", "Negative"), 1, prob = c(0.4, 0.6))
  treatment <- "None"
  
  if(test_result == "Positive") {
    if(district == "Highland") {
      treatment <- "Artemether" # Highland treats everyone
    } else {
      treatment <- sample(c("Artemether", "None"), 1, prob = c(0.5, 0.5)) # Coastal has stockouts
    }
  }
  
  return(c(paste0("VIS", id), fac_id, as.character(visit_date), age, sex, test_result, treatment))
}

visits_matrix <- t(sapply(1:n_rows, generate_visit))
colnames(visits_matrix) <- c("VisitID", "FacilityID", "Date", "Age", "Sex", "Test_Result", "Treatment_Given")
write.csv(visits_matrix, "fact_patient_visits.csv", row.names = FALSE)

print("Data Generation Complete.")

© 2025 Oluwatobi Olatunbosun · CC BY-NC-SA 4.0
Power BI for M&E and Public Health Data Analysts