# install.packages("msm", repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/") library(msm) library(survival) library(dplyr) data <- read.csv("paper_data_new.csv") # 定义一个函数 feature_function <- function(data) { #性别 data$rgender_group <- factor(data$rgender, levels = c(1, 2), labels = c("Male", "Female")) #年齡 data$age_group <- cut(data$age, breaks = c(45, 65, Inf), labels = c("45-64", ">=65"), right = FALSE) #婚姻 data$marital_group <- factor(data$marital_status, levels = c(1, 0), labels = c("Married or partnered", "Other marital status")) #教育 data$education_group <- factor(data$education, levels = c(0, 1), labels = c("Below primary level", "Primary school or higher")) #运动情况 data$activity_group <- factor(data$Physical_activity, levels = c(0, 1, 2), labels = c("Inactive", "Active", "Active")) #心理得分 data$psychiatric_group <- cut(data$Psychiatric_score, breaks = c(0, 10, Inf),labels = c("No", "Yes"), right = FALSE) #BMI # 使用 cut() 创建因子类型的变量 data$variable <- cut(data$BMI, breaks = c(0, 18.5, 24, Inf), labels = c("Underweight", "Normal", "Overweight"), right = FALSE) # 使用 dplyr::recode 直接替换因子水平 data$variable <- recode(data$variable, "Underweight" = 1L, "Normal" = 0L, "Overweight" = 2L) # 将 data$variable 转换回因子并保留原 levels data$BMI_group <- factor(data$variable, levels = c(0, 1, 2), labels = c("Normal", "Underweight", "Overweight")) #ADL # 使用 cut() 创建因子类型的变量 data$ADL_group <- cut(data$ADL, breaks = c(0, 1, 3, Inf), labels = c("No impairment", "Impairment", "Impairment"), right = FALSE) #drink data$Drink_group <- factor(data$Drink, levels = c(0, 1), labels = c("No", "Yes")) #smoke data$Smoke_group <- factor(data$Smoke, levels = c(0, 1), labels = c("No", "Yes")) return(data) # 返回结果 } # 调用函数 data <- feature_function(data) # 只对需要的列进行标准化,不替换原列 # columns_to_standardize <- c("last_year_pm2.5", "last_year_nl") # df_standardized <- data # df_standardized[columns_to_standardize] <- scale(df_standardized[columns_to_standardize]) # 计算交互项(使用原始列进行计算) # data$nl_pm25 <- df_standardized$last_year_nl * df_standardized$last_year_pm2.5 # data$nl_pm25 <- scale(data$nl_pm25) # summary(data) # View(data[, c("ADL", "variable")]) # View(data$nl_O3) # 计算状态转移频数表 freq_table <- statetable.msm(state, ID, data = data) print(freq_table) # 初始化转移速率矩阵 qmatrix_init <- matrix(c(-0.5, 0.25, 0.15, 0.1, 0.1, -0.3, 0.1, 0.1, 0.3, 0.1, -0.5, 0.1, 0, 0, 0, 0), nrow = 4, byrow = TRUE) # 创建初始模型 crude_init <- crudeinits.msm(state ~ wave, subject = ID, data = data, qmatrix = qmatrix_init) View(crude_init) # 多协变量的分析,单个 # 进行多状态模型分析 one_function <- function(data, crude_init) { msm_model <- msm(state ~ wave, subject = ID, data = data, qmatrix = crude_init, covariates = ~last_year_NO3+rgender_group+age_group+marital_group+education_group+activity_group+psychiatric_group+BMI_group+ADL_group+Smoke_group+Drink_group+last_year_NO2+last_year_O3+last_year_PM1+last_year_nl, death = 4, method = "BFGS", control = list(fnscale = 5000, maxit = 10000) ) # 获取模型结果并转换为字符串 model_summary <- capture.output(summary(msm_model)) write("", file = "one.txt") # 清空文件内容 # 写入文件,附加协变量名称 cat("Results for covariates:", file = "one.txt", append = TRUE) cat(model_summary, file = "one.txt", sep = "\n", append = TRUE) } # 调用函数 output <- one_function(data, crude_init) # 多协变量的分析,全部 # 定义协变量列表 # covariates_list <- c("last_year_SO4", "last_year_NO3", "last_year_BC", "last_year_NH4", "last_year_OM") covariates_list <- c("cur_year_SO4", "cur_year_NO3", "cur_year_BC", "cur_year_NH4", "cur_year_OM") # covariates_list <- c("before_last_SO4", "before_last_NO3", "before_last_BC", "before_last_NH4", "before_last_OM") # 循环计算不同协变量的模型 for (cov in covariates_list) { print(cov) # 动态生成协变量的公式 # cov_formula <- paste("~", cov, "+rgender_group+age_group+marital_group+education_group+activity_group+psychiatric_group+BMI_group+ADL_group+Smoke_group+Drink_group+last_year_NO2+last_year_O3+last_year_PM1+last_year_nl") cov_formula <- paste("~", cov, "+rgender_group+age_group+marital_group+education_group+activity_group+psychiatric_group+BMI_group+ADL_group+Smoke_group+Drink_group+cur_year_NO2+cur_year_O3+cur_year_PM1+cur_year_nl") # cov_formula <- paste("~", cov, "+rgender_group+age_group+marital_group+education_group+activity_group+psychiatric_group+BMI_group+ADL_group+Smoke_group+Drink_group+before_last_NO2+before_last_O3+before_last_PM1+before_last_nl") # 进行多状态模型分析 msm_model <- msm(state ~ wave, subject = ID, data = data, qmatrix = crude_init, covariates = as.formula(cov_formula), death = 4, method = "BFGS", control = list(fnscale = 5000, maxit = 10000) ) # 获取模型结果并转换为字符串 model_summary <- capture.output(summary(msm_model)) write("", file = cov) # 清空文件内容 # 写入文件,附加协变量名称 cat("Results for covariates:", file = cov, append = TRUE) cat(model_summary, file = cov, sep = "\n", append = TRUE) print(cov) } # 查看模型的详细结果 summary(msm_model) # 去掉协变量 msm_model <- msm(state ~ wave, subject = ID, data = data, qmatrix = crude_init, death = 4, method = "BFGS", control = list(fnscale = 5000, maxit = 10000) ) # 计算状态转移概率矩阵 prob_matrix <- pmatrix.msm(msm_model, t = 5) # t = 1 代表随访之间的间隔时间 print(prob_matrix) # 输出拟合模型的速率矩阵 q_matrix <- qmatrix.msm(msm_model) print(q_matrix) # 提取转移强度 transition_intensity <- msm_model$qmatrix print(transition_intensity) # 计算在每个状态中的平均逗留时间 so_journ <- sojourn.msm(msm_model) print(so_journ) # 计算均衡状态概率 #单个协变量的分析 # 定义协变量列表 covariates_list <- list(~age_group,~rgender_group,~marital_group,~education_group,~activity_group,~psychiatric_group,~BMI_group,~ADL_group, ~Smoke,~Drink,~last_year_NO2, ~last_year_O3, ~last_year_PM1, ~last_year_PM2.5, ~last_year_PM10, ~last_year_SO4,~last_year_NO3,~last_year_NH4,~last_year_OM,~last_year_BC, ~last_year_nl, ~cur_year_NO2, ~cur_year_O3, ~cur_year_PM1, ~cur_year_PM2.5, ~cur_year_PM10, ~cur_year_SO4,~cur_year_NO3,~cur_year_NH4,~cur_year_OM,~cur_year_BC, ~cur_year_nl) # 创建一个空的结果文件 result_file <- "msm_model_results.txt" write("", file = result_file) # 清空文件内容 # 循环计算不同协变量的模型 for (cov in covariates_list) { # 运行msm模型 msm_model <- msm(state ~ wave, subject = ID, data = data, qmatrix = crude_init, covariates = cov, death = 4, method = "BFGS", control = list(fnscale = 5000, maxit = 10000) ) # 获取模型结果并转换为字符串 model_summary <- capture.output(summary(msm_model)) # 写入文件,附加协变量名称 cat("Results for covariates:", deparse(cov), "\n", file = result_file, append = TRUE) cat(model_summary, file = result_file, sep = "\n", append = TRUE) cat("\n\n", file = result_file, append = TRUE) # 空行分隔每个协变量结果 } library(dplyr) time_check <- data %>% group_by(ID) %>% arrange(wave) %>% mutate( last_year_pollution = lag(cur_year_SO4), # 用当前年变量生成实际的上一年数据 time_diff = wave - lag(wave) ) %>% select(ID, wave, time_diff, cur_year_SO4, last_year_SO4, last_year_pollution) %>% filter(!is.na(last_year_SO4)) # 检查系统是否自动对齐 cat("时间错位比例:", mean(time_check$last_year_SO4 != time_check$last_year_pollution, na.rm=T)*100, "%\n") # 若输出>0%,说明存在错位记录