123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202 |
- library(msm)
- library(survival)
- library(dplyr)
- data <- read.csv("paper_data_new.csv")
- feature_function <- function(data) {
-
- data$rgender_group <- factor(data$rgender, levels = c(1, 2), labels = c("Male", "Female"))
-
- data$age_group <- cut(data$age,
- breaks = c(45, 65, Inf),
- labels = c("45-64", ">=65"),
- right = FALSE)
-
- data$marital_group <- factor(data$marital_status, levels = c(1, 0), labels = c("Married or partnered", "Other marital status"))
-
- data$education_group <- factor(data$education, levels = c(0, 1), labels = c("Below primary level", "Primary school or higher"))
-
- data$activity_group <- factor(data$Physical_activity, levels = c(0, 1, 2), labels = c("Inactive", "Active", "Active"))
-
- data$psychiatric_group <- cut(data$Psychiatric_score, breaks = c(0, 10, Inf),labels = c("No", "Yes"), right = FALSE)
-
-
- data$variable <- cut(data$BMI, breaks = c(0, 18.5, 24, Inf), labels = c("Underweight", "Normal", "Overweight"), right = FALSE)
-
- data$variable <- recode(data$variable, "Underweight" = 1L, "Normal" = 0L, "Overweight" = 2L)
-
- data$BMI_group <- factor(data$variable, levels = c(0, 1, 2), labels = c("Normal", "Underweight", "Overweight"))
-
-
- data$ADL_group <- cut(data$ADL, breaks = c(0, 1, 3, Inf), labels = c("No impairment", "Impairment", "Impairment"), right = FALSE)
-
- data$Drink_group <- factor(data$Drink, levels = c(0, 1), labels = c("No", "Yes"))
-
- data$Smoke_group <- factor(data$Smoke, levels = c(0, 1), labels = c("No", "Yes"))
- return(data)
- }
- data <- feature_function(data)
- freq_table <- statetable.msm(state, ID, data = data)
- print(freq_table)
- qmatrix_init <- matrix(c(-0.5, 0.25, 0.15, 0.1,
- 0.1, -0.3, 0.1, 0.1,
- 0.3, 0.1, -0.5, 0.1,
- 0, 0, 0, 0),
- nrow = 4, byrow = TRUE)
- crude_init <- crudeinits.msm(state ~ wave, subject = ID, data = data, qmatrix = qmatrix_init)
- View(crude_init)
- one_function <- function(data, crude_init) {
- msm_model <- msm(state ~ wave, subject = ID, data = data,
- qmatrix = crude_init,
- covariates = ~last_year_NO3+rgender_group+age_group+marital_group+education_group+activity_group+psychiatric_group+BMI_group+ADL_group+Smoke_group+Drink_group+last_year_NO2+last_year_O3+last_year_PM1+last_year_nl,
- death = 4,
- method = "BFGS", control = list(fnscale = 5000, maxit = 10000)
- )
-
- model_summary <- capture.output(summary(msm_model))
- write("", file = "one.txt")
-
- cat("Results for covariates:", file = "one.txt", append = TRUE)
- cat(model_summary, file = "one.txt", sep = "\n", append = TRUE)
- }
- output <- one_function(data, crude_init)
- covariates_list <- c("cur_year_SO4", "cur_year_NO3", "cur_year_BC", "cur_year_NH4", "cur_year_OM")
- for (cov in covariates_list) {
- print(cov)
-
-
- cov_formula <- paste("~", cov, "+rgender_group+age_group+marital_group+education_group+activity_group+psychiatric_group+BMI_group+ADL_group+Smoke_group+Drink_group+cur_year_NO2+cur_year_O3+cur_year_PM1+cur_year_nl")
-
-
- msm_model <- msm(state ~ wave, subject = ID, data = data,
- qmatrix = crude_init,
- covariates = as.formula(cov_formula),
- death = 4,
- method = "BFGS", control = list(fnscale = 5000, maxit = 10000)
- )
-
- model_summary <- capture.output(summary(msm_model))
- write("", file = cov)
-
- cat("Results for covariates:", file = cov, append = TRUE)
- cat(model_summary, file = cov, sep = "\n", append = TRUE)
- print(cov)
- }
- summary(msm_model)
- msm_model <- msm(state ~ wave, subject = ID, data = data,
- qmatrix = crude_init,
- death = 4,
- method = "BFGS", control = list(fnscale = 5000, maxit = 10000)
- )
- prob_matrix <- pmatrix.msm(msm_model, t = 5)
- print(prob_matrix)
- q_matrix <- qmatrix.msm(msm_model)
- print(q_matrix)
- transition_intensity <- msm_model$qmatrix
- print(transition_intensity)
- so_journ <- sojourn.msm(msm_model)
- print(so_journ)
- covariates_list <- list(~age_group,~rgender_group,~marital_group,~education_group,~activity_group,~psychiatric_group,~BMI_group,~ADL_group, ~Smoke,~Drink,~last_year_NO2, ~last_year_O3, ~last_year_PM1, ~last_year_PM2.5, ~last_year_PM10, ~last_year_SO4,~last_year_NO3,~last_year_NH4,~last_year_OM,~last_year_BC, ~last_year_nl,
- ~cur_year_NO2, ~cur_year_O3, ~cur_year_PM1, ~cur_year_PM2.5, ~cur_year_PM10, ~cur_year_SO4,~cur_year_NO3,~cur_year_NH4,~cur_year_OM,~cur_year_BC, ~cur_year_nl)
- result_file <- "msm_model_results.txt"
- write("", file = result_file)
- for (cov in covariates_list) {
-
-
- msm_model <- msm(state ~ wave, subject = ID, data = data,
- qmatrix = crude_init,
- covariates = cov,
- death = 4,
- method = "BFGS", control = list(fnscale = 5000, maxit = 10000)
- )
-
-
- model_summary <- capture.output(summary(msm_model))
-
-
- cat("Results for covariates:", deparse(cov), "\n", file = result_file, append = TRUE)
- cat(model_summary, file = result_file, sep = "\n", append = TRUE)
- cat("\n\n", file = result_file, append = TRUE)
- }
- library(dplyr)
- time_check <- data %>%
- group_by(ID) %>%
- arrange(wave) %>%
- mutate(
- last_year_pollution = lag(cur_year_SO4),
- time_diff = wave - lag(wave)
- ) %>%
- select(ID, wave, time_diff, cur_year_SO4, last_year_SO4, last_year_pollution) %>%
- filter(!is.na(last_year_SO4))
- cat("时间错位比例:", mean(time_check$last_year_SO4 != time_check$last_year_pollution, na.rm=T)*100, "%\n")
|