code.R 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. # install.packages("msm", repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")
  2. library(msm)
  3. library(survival)
  4. library(dplyr)
  5. data <- read.csv("paper_data_new.csv")
  6. # 定义一个函数
  7. feature_function <- function(data) {
  8. #性别
  9. data$rgender_group <- factor(data$rgender, levels = c(1, 2), labels = c("Male", "Female"))
  10. #年齡
  11. data$age_group <- cut(data$age,
  12. breaks = c(45, 65, Inf),
  13. labels = c("45-64", ">=65"),
  14. right = FALSE)
  15. #婚姻
  16. data$marital_group <- factor(data$marital_status, levels = c(1, 0), labels = c("Married or partnered", "Other marital status"))
  17. #教育
  18. data$education_group <- factor(data$education, levels = c(0, 1), labels = c("Below primary level", "Primary school or higher"))
  19. #运动情况
  20. data$activity_group <- factor(data$Physical_activity, levels = c(0, 1, 2), labels = c("Inactive", "Active", "Active"))
  21. #心理得分
  22. data$psychiatric_group <- cut(data$Psychiatric_score, breaks = c(0, 10, Inf),labels = c("No", "Yes"), right = FALSE)
  23. #BMI
  24. # 使用 cut() 创建因子类型的变量
  25. data$variable <- cut(data$BMI, breaks = c(0, 18.5, 24, Inf), labels = c("Underweight", "Normal", "Overweight"), right = FALSE)
  26. # 使用 dplyr::recode 直接替换因子水平
  27. data$variable <- recode(data$variable, "Underweight" = 1L, "Normal" = 0L, "Overweight" = 2L)
  28. # 将 data$variable 转换回因子并保留原 levels
  29. data$BMI_group <- factor(data$variable, levels = c(0, 1, 2), labels = c("Normal", "Underweight", "Overweight"))
  30. #ADL
  31. # 使用 cut() 创建因子类型的变量
  32. data$ADL_group <- cut(data$ADL, breaks = c(0, 1, 3, Inf), labels = c("No impairment", "Impairment", "Impairment"), right = FALSE)
  33. #drink
  34. data$Drink_group <- factor(data$Drink, levels = c(0, 1), labels = c("No", "Yes"))
  35. #smoke
  36. data$Smoke_group <- factor(data$Smoke, levels = c(0, 1), labels = c("No", "Yes"))
  37. return(data) # 返回结果
  38. }
  39. # 调用函数
  40. data <- feature_function(data)
  41. # 只对需要的列进行标准化,不替换原列
  42. # columns_to_standardize <- c("last_year_pm2.5", "last_year_nl")
  43. # df_standardized <- data
  44. # df_standardized[columns_to_standardize] <- scale(df_standardized[columns_to_standardize])
  45. # 计算交互项(使用原始列进行计算)
  46. # data$nl_pm25 <- df_standardized$last_year_nl * df_standardized$last_year_pm2.5
  47. # data$nl_pm25 <- scale(data$nl_pm25)
  48. # summary(data)
  49. # View(data[, c("ADL", "variable")])
  50. # View(data$nl_O3)
  51. # 计算状态转移频数表
  52. freq_table <- statetable.msm(state, ID, data = data)
  53. print(freq_table)
  54. # 初始化转移速率矩阵
  55. qmatrix_init <- matrix(c(-0.5, 0.25, 0.15, 0.1,
  56. 0.1, -0.3, 0.1, 0.1,
  57. 0.3, 0.1, -0.5, 0.1,
  58. 0, 0, 0, 0),
  59. nrow = 4, byrow = TRUE)
  60. # 创建初始模型
  61. crude_init <- crudeinits.msm(state ~ wave, subject = ID, data = data, qmatrix = qmatrix_init)
  62. View(crude_init)
  63. # 多协变量的分析,单个
  64. # 进行多状态模型分析
  65. one_function <- function(data, crude_init) {
  66. msm_model <- msm(state ~ wave, subject = ID, data = data,
  67. qmatrix = crude_init,
  68. covariates = ~last_year_NO3+rgender_group+age_group+marital_group+education_group+activity_group+psychiatric_group+BMI_group+ADL_group+Smoke_group+Drink_group+last_year_NO2+last_year_O3+last_year_PM1+last_year_nl,
  69. death = 4,
  70. method = "BFGS", control = list(fnscale = 5000, maxit = 10000)
  71. )
  72. # 获取模型结果并转换为字符串
  73. model_summary <- capture.output(summary(msm_model))
  74. write("", file = "one.txt") # 清空文件内容
  75. # 写入文件,附加协变量名称
  76. cat("Results for covariates:", file = "one.txt", append = TRUE)
  77. cat(model_summary, file = "one.txt", sep = "\n", append = TRUE)
  78. }
  79. # 调用函数
  80. output <- one_function(data, crude_init)
  81. # 多协变量的分析,全部
  82. # 定义协变量列表
  83. # covariates_list <- c("last_year_SO4", "last_year_NO3", "last_year_BC", "last_year_NH4", "last_year_OM")
  84. covariates_list <- c("cur_year_SO4", "cur_year_NO3", "cur_year_BC", "cur_year_NH4", "cur_year_OM")
  85. # covariates_list <- c("before_last_SO4", "before_last_NO3", "before_last_BC", "before_last_NH4", "before_last_OM")
  86. # 循环计算不同协变量的模型
  87. for (cov in covariates_list) {
  88. print(cov)
  89. # 动态生成协变量的公式
  90. # cov_formula <- paste("~", cov, "+rgender_group+age_group+marital_group+education_group+activity_group+psychiatric_group+BMI_group+ADL_group+Smoke_group+Drink_group+last_year_NO2+last_year_O3+last_year_PM1+last_year_nl")
  91. cov_formula <- paste("~", cov, "+rgender_group+age_group+marital_group+education_group+activity_group+psychiatric_group+BMI_group+ADL_group+Smoke_group+Drink_group+cur_year_NO2+cur_year_O3+cur_year_PM1+cur_year_nl")
  92. # cov_formula <- paste("~", cov, "+rgender_group+age_group+marital_group+education_group+activity_group+psychiatric_group+BMI_group+ADL_group+Smoke_group+Drink_group+before_last_NO2+before_last_O3+before_last_PM1+before_last_nl")
  93. # 进行多状态模型分析
  94. msm_model <- msm(state ~ wave, subject = ID, data = data,
  95. qmatrix = crude_init,
  96. covariates = as.formula(cov_formula),
  97. death = 4,
  98. method = "BFGS", control = list(fnscale = 5000, maxit = 10000)
  99. )
  100. # 获取模型结果并转换为字符串
  101. model_summary <- capture.output(summary(msm_model))
  102. write("", file = cov) # 清空文件内容
  103. # 写入文件,附加协变量名称
  104. cat("Results for covariates:", file = cov, append = TRUE)
  105. cat(model_summary, file = cov, sep = "\n", append = TRUE)
  106. print(cov)
  107. }
  108. # 查看模型的详细结果
  109. summary(msm_model)
  110. # 去掉协变量
  111. msm_model <- msm(state ~ wave, subject = ID, data = data,
  112. qmatrix = crude_init,
  113. death = 4,
  114. method = "BFGS", control = list(fnscale = 5000, maxit = 10000)
  115. )
  116. # 计算状态转移概率矩阵
  117. prob_matrix <- pmatrix.msm(msm_model, t = 5) # t = 1 代表随访之间的间隔时间
  118. print(prob_matrix)
  119. # 输出拟合模型的速率矩阵
  120. q_matrix <- qmatrix.msm(msm_model)
  121. print(q_matrix)
  122. # 提取转移强度
  123. transition_intensity <- msm_model$qmatrix
  124. print(transition_intensity)
  125. # 计算在每个状态中的平均逗留时间
  126. so_journ <- sojourn.msm(msm_model)
  127. print(so_journ)
  128. # 计算均衡状态概率
  129. #单个协变量的分析
  130. # 定义协变量列表
  131. covariates_list <- list(~age_group,~rgender_group,~marital_group,~education_group,~activity_group,~psychiatric_group,~BMI_group,~ADL_group, ~Smoke,~Drink,~last_year_NO2, ~last_year_O3, ~last_year_PM1, ~last_year_PM2.5, ~last_year_PM10, ~last_year_SO4,~last_year_NO3,~last_year_NH4,~last_year_OM,~last_year_BC, ~last_year_nl,
  132. ~cur_year_NO2, ~cur_year_O3, ~cur_year_PM1, ~cur_year_PM2.5, ~cur_year_PM10, ~cur_year_SO4,~cur_year_NO3,~cur_year_NH4,~cur_year_OM,~cur_year_BC, ~cur_year_nl)
  133. # 创建一个空的结果文件
  134. result_file <- "msm_model_results.txt"
  135. write("", file = result_file) # 清空文件内容
  136. # 循环计算不同协变量的模型
  137. for (cov in covariates_list) {
  138. # 运行msm模型
  139. msm_model <- msm(state ~ wave, subject = ID, data = data,
  140. qmatrix = crude_init,
  141. covariates = cov,
  142. death = 4,
  143. method = "BFGS", control = list(fnscale = 5000, maxit = 10000)
  144. )
  145. # 获取模型结果并转换为字符串
  146. model_summary <- capture.output(summary(msm_model))
  147. # 写入文件,附加协变量名称
  148. cat("Results for covariates:", deparse(cov), "\n", file = result_file, append = TRUE)
  149. cat(model_summary, file = result_file, sep = "\n", append = TRUE)
  150. cat("\n\n", file = result_file, append = TRUE) # 空行分隔每个协变量结果
  151. }
  152. library(dplyr)
  153. time_check <- data %>%
  154. group_by(ID) %>%
  155. arrange(wave) %>%
  156. mutate(
  157. last_year_pollution = lag(cur_year_SO4), # 用当前年变量生成实际的上一年数据
  158. time_diff = wave - lag(wave)
  159. ) %>%
  160. select(ID, wave, time_diff, cur_year_SO4, last_year_SO4, last_year_pollution) %>%
  161. filter(!is.na(last_year_SO4))
  162. # 检查系统是否自动对齐
  163. cat("时间错位比例:", mean(time_check$last_year_SO4 != time_check$last_year_pollution, na.rm=T)*100, "%\n")
  164. # 若输出>0%,说明存在错位记录