123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517 |
- # NHANES
- # install.packages("caret", repos="https://mirrors.ustc.edu.cn/CRAN/")
- library(caret)
- library(nhanesA)
- library(knitr)
- library(tidyverse)
- library(plyr)
- library(dplyr)
- library(foreign)
- path = "NHANES/2017-2018/"
- year = "_J"
- # year_ = "2001-2002"
- DEMO <- read.xport(paste0(path , "DEMO",year,".XPT"))
- #选取数据发布号、样本人员的访谈和检查状态、性别、年龄、种族、出生地
- #社会经济地位数据:家庭 PIR、家庭年度收入
- if(year =="" || year == "_B" || year == "_C" || year == "_D"){
- DEMO <- DEMO %>% select(SEQN, SDDSRVYR, RIDSTATR, RIAGENDR, RIDAGEYR, RIDRETH1, DMDBORN, INDFMPIR, INDFMINC)
- }else if(year =="_E" || year == "_F"){
- DEMO <- DEMO %>% select(SEQN, SDDSRVYR, RIDSTATR, RIAGENDR, RIDAGEYR, RIDRETH1, DMDBORN2, INDFMPIR, INDHHIN2)
- }else{
- DEMO <- DEMO %>% select(SEQN, SDDSRVYR, RIDSTATR, RIAGENDR, RIDAGEYR, RIDRETH1, DMDBORN4, INDFMPIR, INDHHIN2)
- }
- print(names(DEMO)[9])
- names(DEMO)[7] <- "DMDBORN"
- names(DEMO)[9] <- "INDFMINC"
- # #收入
- # if(year=="_E" | year=="_F" | year=="_H" | year=="_G" |year=="_I" | year=="_J"){
- # INQ <- read.xport(paste0(path, "INQ",year,".XPT"))
- # data <- join_all(list(data, INQ), by = "SEQN", type = "left")
- # }
- #重量、站立高度、体重指数
- BMX <- read.xport(paste0(path, "BMX",year,".XPT"))
- BMX <- BMX %>% select(SEQN, BMXWT , BMXHT , BMXBMI )
- data <- join_all(list(DEMO, BMX), by = "SEQN", type = "left")
- #收缩压平均值、舒张压平均值、60 秒心率
- BPX <- read.xport(paste0(path, "BPX",year,".XPT"))
- BPX <- BPX %>% select(SEQN, BPXSY1, BPXDI1, BPXCHR)
- data <- join_all(list(data, BPX), by = "SEQN", type = "left")
- #空腹血浆葡萄糖
- if(year ==""){
- LAB10AM <- read.xport(paste0(path, "LAB10AM",year,".XPT"))
- LAB10AM <- LAB10AM %>% select(SEQN, LBXGLUSI )
- }else if (year == "_B"){
- LAB10AM <- read.xport(paste0(path, "L10AM",year,".XPT"))
- LAB10AM <- LAB10AM %>% select(SEQN, LBXGLUSI )
- }else if (year == "_C"){
- LAB10AM <- read.xport(paste0(path, "L10AM",year,".XPT"))
- LAB10AM <- LAB10AM %>% select(SEQN, LBDGLUSI)
- }else{
- LAB10AM <- read.xport(paste0(path, "GLU",year,".XPT"))
- LAB10AM <- LAB10AM %>% select(SEQN, LBDGLUSI)
- }
- names(LAB10AM)[2] <- "LBDGLUSI"
- data <- join_all(list(data, LAB10AM), by = "SEQN", type = "left")
- #尿白蛋白
- if (year == ""){
- LAB16 <- read.xport(paste0(path, "LAB16",year,".XPT"))
- }else if(year == "_B" | year == "_C"){
- LAB16 <- read.xport(paste0(path, "L16",year,".XPT"))
- }else{
- LAB16 <- read.xport(paste0(path, "ALB_CR",year,".XPT"))
- }
- LAB16 <- LAB16 %>% select(SEQN, URXUMA)
- data <- join_all(list(data, LAB16), by = "SEQN", type = "left")
- #血常规:白细胞计数 (SI)1000 个细胞/uL、淋巴细胞计数、单核细胞数、红细胞计数 SI(百万细胞/uL)、血红蛋白 (g/dL)、血小板计数 (1000 个细胞/uL)
- #、嗜酸性粒细胞数、嗜碱性粒细胞数、Segmented neutrophils number(分叶中性粒细胞数量)1000 细胞/uL
- if (year == ""){
- LAB25 <- read.xport(paste0(path, "LAB25",year,".XPT"))
- }else if(year == "_B" | year == "_C"){
- LAB25 <- read.xport(paste0(path, "L25",year,".XPT"))
- }else{
- LAB25 <- read.xport(paste0(path, "CBC",year,".XPT"))
- }
- LAB25 <- LAB25 %>% select(SEQN, LBXWBCSI, LBDLYMNO, LBDMONO, LBXRBCSI, LBXHGB, LBXPLTSI, LBDEONO, LBDBANO, LBDNENO)
- #计算炎症指标
- LAB25$SIRI <- LAB25$LBDNENO*LAB25$LBDMONO/LAB25$LBDLYMNO
- LAB25$SII <- LAB25$LBXPLTSI*LAB25$LBDNENO/LAB25$LBDLYMNO
- data <- join_all(list(data, LAB25), by = "SEQN", type = "left")
- #血脂:甘油三酯(mmol/L))、低密度脂蛋白胆固醇(mmol/L))、总胆固醇、高密度脂蛋白胆固醇(mmol/L)、总胆固醇、甘油三酯
- #肾功能:血尿素氮、肌酐
- #肝功能:ALT、AST、总胆红素
- if (year == ""){
- LAB13AM <- read.xport(paste0(path, "LAB13AM",year,".XPT"))
- }else if(year == "_B" | year == "_C"){
- LAB13AM <- read.xport(paste0(path, "L13AM",year,".XPT"))
- }else{
- LAB13AM <- read.xport(paste0(path, "TRIGLY",year,".XPT"))
- }
- LAB13AM <- LAB13AM %>% select(SEQN, LBDTRSI, LBDLDLSI)
- data <- join_all(list(data, LAB13AM), by = "SEQN", type = "left")
- if(year ==""){
- LAB13 <- read.xport(paste0(path, "LAB13",year,".XPT"))
- }else if(year == "_B" | year == "_C"){
- LAB13 <- read.xport(paste0(path, "L13",year,".XPT"))
- }else{
- LAB13 <- read.xport(paste0(path, "TCHOL",year,".XPT"))
- }
- if(year=="" | year=="_B"){
- LAB13 <- LAB13 %>% select(SEQN, LBDTCSI, LBDHDLSI)
- }else if(year == "_C"){ #Direct HDL-Cholesterol
- LAB13 <- LAB13 %>% select(SEQN, LBDTCSI, LBDHDDSI)
- }else{
- LAB13 <- LAB13 %>% select(SEQN, LBDTCSI)
- HDL <- read.xport(paste0(path, "HDL",year,".XPT"))
- HDL <- HDL %>% select(SEQN, LBDHDDSI)
- LAB13 <- join_all(list(LAB13, HDL), by = "SEQN", type = "left")
- }
- names(LAB13)[3] <- "LBDHDDSI"
- data <- join_all(list(data, LAB13), by = "SEQN", type = "left")
- #计算炎症指标
- data$MHR <- data$LBDMONO/data$LBDHDDSI
- data$NHR <- data$LBDNENO/data$LBDHDDSI
- data$PHR <- data$LBXPLTSI/data$LBDHDDSI
- data$LHR <- data$LBDLYMNO /data$LBDHDDSI
- # 同型半胱氨酸(1999-2006),其他年份为空
- if(year == "" || year == "_D" || year == "_C" || year == "_B"){
- if(year == ""){
- LAB06 <- read.xport(paste0(path, "LAB06",year,".XPT"))
- LAB06 <- LAB06 %>% select(SEQN, LBXHCY)
- }else if(year == "_B"){
- LAB06 <- read.xport(paste0(path, "L06",year,".XPT"))
- LAB06 <- LAB06 %>% select(SEQN, LBDHCY)
- }else if(year == "_C"){
- LAB06 <- read.xport(paste0(path, "L06MH",year,".XPT"))
- LAB06 <- LAB06 %>% select(SEQN, LBXHCY)
- }else{
- LAB06 <- read.xport(paste0(path, "HCY",year,".XPT"))
- LAB06 <- LAB06 %>% select(SEQN, LBXHCY)
- }
- names(LAB06)[2] <- "LBXHCY"
- data <- join_all(list(data, LAB06), by = "SEQN", type = "left")
- }else{
- data$LBXHCY <- NA
- }
- if(year == ""){
- LAB18 <- read.xport(paste0(path, "LAB18",year,".XPT"))
- }else if(year == "_B" || year == "_C"){
- LAB18 <- read.xport(paste0(path, "L40",year,".XPT"))
- }else{
- LAB18 <- read.xport(paste0(path, "BIOPRO",year,".XPT"))
- }
- LAB18 <- LAB18 %>% select(SEQN, LBDSCHSI, LBDSTRSI, LBDSBUSI, LBDSCRSI, LBXSATSI, LBXSASSI, LBDSTBSI )
- data <- join_all(list(data, LAB18), by = "SEQN", type = "left")
- #甲状腺功能:促甲状腺激素
- if(year == "" || year == "_B" ||year =="_E" || year =="_F" || year =="_G"){
- if(year == ""){
- LAB18T4 <- read.xport(paste0(path, "LAB18T4",year,".XPT"))
- LAB18T4 <- LAB18T4 %>% select(SEQN, LBXTSH)
- }else if(year == "_B"){
- LAB18T4 <- read.xport(paste0(path, "L40T4",year,".XPT"))
- LAB18T4 <- LAB18T4 %>% select(SEQN, LBXTSH)
- }else if (year =="_E" || year =="_F" || year =="_G"){
- LAB18T4 <- read.xport(paste0(path, "THYROD",year,".XPT"))
- LAB18T4 <- LAB18T4 %>% select(SEQN, LBXTSH1)
- }
- names(LAB18T4)[2] <- "LBXTSH"
- data <- join_all(list(data, LAB18T4), by = "SEQN", type = "left")
- }else{
- data$LBXTSH <- NA
- }
- #甲状腺功能:游离甲状腺素(2007-2012)
- if(year == "_E" || year == "_F" || year == "_G"){
- THYROD <- read.xport(paste0(path, "THYROD",year,".XPT"))
- THYROD <- THYROD %>% select(SEQN, LBDT4FSI)
- data <- join_all(list(data, THYROD), by = "SEQN", type = "left")
- }else{
- data$LBDT4FSI <- NA
- }
- #吸烟史
- #成人 20+
- SMQ <- read.xport(paste0(path, "SMQ",year,".XPT"))
- SMQ <- SMQ %>% select(SEQN, SMQ020, SMD030, SMQ040, SMQ050Q, SMQ050U, SMD057)
- data <- join_all(list(data, SMQ), by = "SEQN", type = "left")
- #饮酒史 20+
- ALQ <- read.xport(paste0(path, "ALQ",year,".XPT"))
- if(year == "" || year == "_B" ||year =="_E" || year =="_F" || year =="_C" || year =="_D"){
- ALQ <- ALQ %>% select(SEQN, ALQ150, ALQ130)
- }else{
- ALQ <- ALQ %>% select(SEQN, ALQ151, ALQ130)
- }
- names(ALQ)[2] <- "ALQ150"
- data <- join_all(list(data, ALQ), by = "SEQN", type = "left")
- #饮食习惯
- DBQ <- read.xport(paste0(path, "DBQ",year,".XPT"))
- if(year == "" || year == "_B" ||year =="_E" || year =="_C" || year =="_D"){
- if(year == ""){
- DBQ <- DBQ %>% select(SEQN, DBQ010, DBD030, DBD040, DBD050, DBD060, DBQ070A, DBQ070B, DBQ070C, DBQ390, DBD400, DBD410, DBQ420,
- DBQ070D, DBD195, DBQ220A, DBQ220B, DBQ220C, DBQ220D, DBD235A, DBD235B, DBD235C, DBQ300, DBQ330, DBD360, DBD370, DBD380)
- }else if (year == "_B"){
- DBQ <- DBQ %>% select(SEQN, DBQ010, DBD030, DBD040, DBD050, DBD060, DBD071A, DBD071B, DBD071C, DBQ390, DBQ400, DBD411, DBD421,
- DBD071D, DBD196, DBD221A, DBD221B, DBD221C, DBD221D, DBD235AE, DBD235BE, DBD235CE, DBD301, DBQ330, DBQ360, DBQ370, DBD381)
- }else if(year == "_C"){
- DBQ <- DBQ %>% select(SEQN, DBQ010, DBD030, DBD040, DBD050, DBD060, DBQ071A, DBQ071B, DBQ071C, DBQ390, DBQ400, DBD411, DBQ421,
- DBQ071D, DBD197, DBQ221A, DBQ221B, DBQ221C, DBQ221D, DBQ235A, DBQ235B, DBQ235C, DBQ301, DBQ330, DBQ360, DBQ370, DBD381)
- }else if(year == "_D" || year == "_E"){
- DBQ <- DBQ %>% select(SEQN, DBQ010, DBD030, DBD040, DBD050, DBD060, DBD072A, DBD072B, DBD072C, DBQ390, DBQ400, DBD411, DBQ421,
- DBD072D, DBQ197, DBD222A, DBD222B, DBD222C, DBD222D, DBQ235A, DBQ235B, DBQ235C, DBQ301, DBQ330, DBQ360, DBQ370, DBD381)
- }
- }else{
- DBQ <- DBQ %>% select(SEQN, DBQ010, DBD030, DBD041, DBD050, DBD061, DBQ073A, DBQ073B, DBQ073C, DBQ390, DBQ400, DBD411, DBQ421,
- DBQ073D, DBQ197, DBQ223A, DBQ223B, DBQ223C, DBQ223D, DBQ235A, DBQ235B, DBQ235C, DBQ301, DBQ330, DBQ360, DBQ370, DBD381)
- }
- names(DBQ) <- c("SEQN", "DBQ010", "DBD030", "DBD041", "DBD050", "DBD061", "DBQ073A", "DBQ073B", "DBQ073C", "DBQ390", "DBQ400", "DBD411", "DBQ421",
- "DBQ073D", "DBQ197", "DBQ223A", "DBQ223B", "DBQ223C", "DBQ223D", "DBQ235A", "DBQ235B", "DBQ235C", "DBQ301", "DBQ330", "DBQ360", "DBQ370", "DBD381")
- data <- join_all(list(data, DBQ), by = "SEQN", type = "left")
- #体力活动
- PAQ <- read.xport(paste0(path, "PAQ",year,".XPT"))
- PAQ[is.na(PAQ)] <- 0
- if(year == ""){
- PAQ_s <- PAQ %>% select(SEQN, PAD020, PAQ100, PAQ180, PAD440, PAQ480, PAQ560, PAD570, PAQ580)
- PAQ_s$PAD020 <- ifelse(PAQ_s$PAD020 == 1, 4, 0)
- PAQ_s$PAQ100 <- ifelse(PAQ_s$PAQ100 == 1, 4.5, 0)
- PAQ_s$PAQ180 <- ifelse(PAQ_s$PAQ180 == 1, 1.4, ifelse(PAQ_s$PAQ180 == 2, 1.5, ifelse(PAQ_s$PAQ180 == 3, 1.6, ifelse(PAQ_s$PAQ180 == 4, 1.8, 0))))
- PAQ_s$PAD440 <- ifelse(PAQ_s$PAD440 == 1, 4, 0)
- PAQ_s$PAQ480 <- ifelse(PAQ_s$PAQ480 < 6, 1.2 * PAQ_s$PAQ480, 0)
- PAQ_s$PAQ560 <- ifelse(PAQ_s$PAQ560 < 78, 7 * PAQ_s$PAQ560, 0)
- PAQ_s$PAD570 <- ifelse(PAQ_s$PAD570 < 6, PAQ_s$PAD570, 0)
- PAQ_s$PAQ580 <- ifelse(PAQ_s$PAQ580 < 6, 1.5 * PAQ_s$PAQ580, 0)
- PAQ$SCORE <- apply(PAQ_s[, -1], 1, function(x) sum(x))
- }else if(year =="_B"){
- PAQ_s <- PAQ %>% select(SEQN, PAD020, PAQ100, PAQ180, PAD440, PAQ560, PAD590, PAD600)
- PAQ_s$PAD020 <- ifelse(PAQ_s$PAD020 == 1, 4, 0)
- PAQ_s$PAQ100 <- ifelse(PAQ_s$PAQ100 == 1, 4.5, 0)
- PAQ_s$PAQ180 <- ifelse(PAQ_s$PAQ180 == 1, 1.4, ifelse(PAQ_s$PAQ180 == 2, 1.5, ifelse(PAQ_s$PAQ180 == 3, 1.6, ifelse(PAQ_s$PAQ180 == 4, 1.8, 0))))
- PAQ_s$PAD440 <- ifelse(PAQ_s$PAD440 == 1, 4, 0)
- PAQ_s$PAQ560 <- ifelse(PAQ_s$PAQ560 < 78, 7 * PAQ_s$PAQ560, 0)
- PAQ_s$PAD590 <- ifelse(PAQ_s$PAD590 < 6, PAQ_s$PAD590, 0)
- PAQ_s$PAD600 <- ifelse(PAQ_s$PAD600 < 6, 1.5 * PAQ_s$PAD600, 0)
- PAQ$SCORE <- apply(PAQ_s[, -1], 1, function(x) sum(x))
- }else if(year =="_C" || year =="_D"){
- PAQ_s <- PAQ %>% select(SEQN, PAD020, PAQ100, PAQ180, PAD440, PAQ560, PAD590, PAD600)
- PAQ_s$PAD020 <- ifelse(PAQ_s$PAD020 == 1, 4, 0)
- PAQ_s$PAQ100 <- ifelse(PAQ_s$PAQ100 == 1, 4.5, 0)
- PAQ_s$PAQ180 <- ifelse(PAQ_s$PAQ180 == 1, 1.4, ifelse(PAQ_s$PAQ180 == 2, 1.5, ifelse(PAQ_s$PAQ180 == 3, 1.6, ifelse(PAQ_s$PAQ180 == 4, 1.8, 0))))
- PAQ_s$PAD440 <- ifelse(PAQ_s$PAD440 == 1, 4, 0)
- PAQ_s$PAQ560 <- ifelse(PAQ_s$PAQ560 < 78, 7 * PAQ_s$PAQ560, 0)
- PAQ_s$PAD590 <- ifelse(PAQ_s$PAD590 < 6, PAQ_s$PAD590, 0)
- PAQ_s$PAD600 <- ifelse(PAQ_s$PAD600 < 6, 1.5 * PAQ_s$PAD600, 0)
- PAQ$SCORE <- apply(PAQ_s[, -1], 1, function(x) sum(x))
- }else{
- PAQ_s <- PAQ %>% select(SEQN, PAD615, PAD630, PAD645, PAD660, PAD675)
- PAQ_s$PAD615 <- ifelse(PAQ_s$PAD615 < 841, 8 * PAQ_s$PAD615, 0)
- PAQ_s$PAD630 <- ifelse(PAQ_s$PAD630 < 841, 4 * PAQ_s$PAD630, 0)
- PAQ_s$PAD645 <- ifelse(PAQ_s$PAD645 < 661, 4 * PAQ_s$PAD645, 0)
- PAQ_s$PAD660 <- ifelse(PAQ_s$PAD660 < 481, 8 * PAQ_s$PAD660, 0)
- PAQ_s$PAD675 <- ifelse(PAQ_s$PAD675 < 541, 4 * PAQ_s$PAD675, 0)
- PAQ$SCORE <- apply(PAQ_s[, -1], 1, function(x) sum(x))
- PAQ$PAAQUEX <- NA
- }
- PAQ <- PAQ %>% select(SEQN, SCORE, PAAQUEX)
- data <- join_all(list(data, PAQ), by = "SEQN", type = "left")
- #睡眠问题
- if(year != "" && year != "_B" && year != "_C"){
- SLQ <- read.xport(paste0(path, "SLQ",year,".XPT"))
- SLQ <- SLQ %>% select(SEQN, SLQ050)
- data <- join_all(list(data, SLQ), by = "SEQN", type = "left")
- }else{
- data$SLQ050 <- NA
- }
- #心理健康
- if (year =="" || year == "_B" || year == "_C"){
- #心理健康 - 抑郁症:抑郁评分
- if(year==""){
- CIQMDEP <- read.xport(paste0(path, "CIQMDEP",year,".XPT"))
- }else if (year == "_B" | year == "_C"){
- CIQMDEP <- read.xport(paste0(path, "CIQDEP",year,".XPT"))
- }
- CIQMDEP <- CIQMDEP %>% select(SEQN, CIDDSCOR)
- data <- join_all(list(data, CIQMDEP), by = "SEQN", type = "left")
- # #精神健康 - 广泛性焦虑症:焦虑评分
- # CIQGAD <- read.xport(paste0(path, "CIQGAD",year,".XPT"))
- # CIQGAD <- CIQGAD %>% select(SEQN, CIDGSCOR)
- # data <- join_all(list(data, CIQGAD), by = "SEQN", type = "left")
- # #心理健康 - 恐慌症:恐慌评分
- # if(year==""){
- # CIQPANIC <- read.xport(paste0(path, "CIQPANIC",year,".XPT"))
- # }else{
- # CIQPANIC <- read.xport(paste0(path, "CIQPAN",year,".XPT"))
- # }
- # CIQPANIC <- CIQPANIC %>% select(SEQN, CIDPSCOR)
- # data <- join_all(list(data, CIQPANIC), by = "SEQN", type = "left")
- }else{
- DPQ <- read.xport(paste0(path, "DPQ",year,".XPT"))
- DPQ <- DPQ[, -which(names(DPQ) == "DPQ100")]
- DPQ <- na.omit(DPQ)
- DPQ$CIDDSCOR <- apply(DPQ[, -1], 1, function(x) sum(x[x < 7]))
- DPQ <- DPQ %>% select(SEQN, CIDDSCOR)
- data <- join_all(list(data, DPQ), by = "SEQN", type = "left")
- }
- #社会支持
- if(year =="" || year == "_B" || year == "_C"){
- SSQ <- read.xport(paste0(path, "SSQ",year,".XPT"))
- }else if(year =="_D" || year == "_E"){
- SSQ <- read.xport(paste0(path, "SSQ",year,".XPT"))
- SSQ <- SSQ[, !(names(SSQ) == "SSD044")]
- }else{
- SSQ[c("SEQN", "SSQ011", "SSQ021A", "SSQ021B", "SSQ021C", "SSQ021D", "SSQ021E", "SSQ021F", "SSQ021G", "SSQ021H", "SSQ021I", "SSQ021J", "SSQ021K",
- "SSQ021L", "SSQ021M", "SSQ021N", "SSQ031", "SSQ041", "SSQ051", "SSQ061")] <- NA
- }
- names(SSQ) <- c("SEQN", "SSQ011", "SSQ021A", "SSQ021B", "SSQ021C", "SSQ021D", "SSQ021E", "SSQ021F", "SSQ021G", "SSQ021H", "SSQ021I", "SSQ021J", "SSQ021K",
- "SSQ021L", "SSQ021M", "SSQ021N", "SSQ031", "SSQ041", "SSQ051", "SSQ061")
- data <- join_all(list(data, SSQ), by = "SEQN", type = "left")
- #疾病诊断记录(有高血压、冠心病、心绞痛、心力衰竭、心脏病发作)
- MCQ <- read.xport(paste0(path, "MCQ",year,".XPT"))
- if(year =="" || year == "_B" || year == "_C" || year =="_D" || year == "_E" || year == "_F"){
- MCQ <- MCQ %>% select(SEQN, MCQ160B, MCQ180B, MCQ160C, MCQ180C, MCQ160D, MCQ180D, MCQ160E, MCQ180E)
- }else if(year =="_G" || year == "_H" || year == "_I" ){
- MCQ <- MCQ %>% select(SEQN, MCQ160B, MCQ180B, MCQ160C, MCQ180C, MCQ160D, MCQ180D, MCQ160E, MCQ180E)
- }else{
- MCQ <- MCQ %>% select(SEQN, MCQ160B, MCD180B, MCQ160C, MCD180C, MCQ160D, MCD180D, MCQ160E, MCD180E)
- }
- names(MCQ) <- c("SEQN", "MCQ160B", "MCD180B", "MCQ160C", "MCD180C", "MCQ160D", "MCD180D", "MCQ160E", "MCD180E")
- data <- join_all(list(data, MCQ), by = "SEQN", type = "left")
- #用药记录,天数、用量
- #心内常用药
- medicine = c("HYDROCHLOROTHIAZIDE", "VALSARTAN", "AMIODARONE", "AMLODIPINE", "ATORVASTATIN",
- "BENAZEPRIL", "OLMESARTAN", "PERINDOPRIL","TELMISARTAN","BENAZEPRIL","BENDROFLUMETHIAZIDE",
- "BEZAFIBRATE","BISOPROLOL","Captopril","Carvedilol", "EPINEPHRINE", "QUINIDINE", "Digoxin",
- "Diltiazem", "Enalapril", "Eprosartan", "ERTUGLIFLOZIN", "SITAGLIPTIN", "EZETIMIBE", "SIMVASTATIN",
- "FENOFIBRIC ACID", "Furosemide", "GLICLAZIDE", "GLIPIZIDE", "METFORMIN", "HYDRALAZINE", "ISOSORBIDE DINITRATE",
- "IRBESARTAN", "METOPROLOL", "PROPRANOLOL", "ISOSORBIDE MONONITRATE", "NIFEDIPINE", "NITROGLYCERIN",
- "PITAVASTATIN", "PRAVASTATIN", "PRAZOSIN", "PROCAINAMIDE", "PROPAFENONE", "SACUBITRIL", "SOTALOL", "SPIRONOLACTONE",
- "TICAGRELOR", "TORSEMIDEMIDE", "VERAPAMIL", "TRIAMTERENE", "WARFARIN")
- RXQ_RX <- read.xport(paste0(path, "RXQ_RX",year,".XPT"))
- if(year == "" || year == "_B"){
- day_RXQ_RX <- RXQ_RX %>%
- select(SEQN, RXD240B, RXD260) %>%
- na.omit(.) %>%
- pivot_wider(names_from = RXD240B, values_from = RXD260,
- names_prefix = "day_",values_fill = 0, values_fn = list(RXD260 = sum))
- number_RXQ_RX <- RXQ_RX %>%
- select(SEQN, RXD240B, RXD295) %>%
- na.omit(.) %>%
- pivot_wider(names_from = RXD240B, values_from = RXD295,
- names_prefix = "number_",values_fill = 0, values_fn = list(RXD295 = sum))
- }else{
- day_RXQ_RX <- RXQ_RX %>%
- select(SEQN, RXDDRUG, RXDDAYS) %>%
- na.omit(.) %>%
- pivot_wider(names_from = RXDDRUG, values_from = RXDDAYS,
- names_prefix = "day_",values_fill = 0, values_fn = list(RXDDAYS = sum))
- number_RXQ_RX <- RXQ_RX %>%
- select(SEQN, RXDDRUG, RXDCOUNT) %>%
- na.omit(.) %>%
- pivot_wider(names_from = RXDDRUG, values_from = RXDCOUNT,
- names_prefix = "number_",values_fill = 0, values_fn = list(RXDCOUNT = sum))
- }
- for (item in medicine){
- med_columns <- grep(item, names(day_RXQ_RX), value = TRUE)
- tmp = day_RXQ_RX[, c("SEQN", med_columns)]
- tmp[[paste0("day_", item)]] <- apply(tmp[, -1], 1, function(x) sum(x))
- tmp <- tmp %>% select(SEQN, paste0("day_", item))
- data <- join_all(list(data, tmp), by = "SEQN", type = "left")
- med_columns <- grep(item, names(number_RXQ_RX), value = TRUE)
- tmp = number_RXQ_RX[, c("SEQN", med_columns)]
- tmp[[paste0("num_", item)]] <- apply(tmp[, -1], 1, function(x) sum(x))
- tmp <- tmp %>% select(SEQN, paste0("num_", item))
- data <- join_all(list(data, tmp), by = "SEQN", type = "left")
- }
- #就诊情况
- HUQ <- read.xport(paste0(path, "HUQ",year,".XPT"))
- if(year == "" || year == "_B" ||year =="_E" || year =="_C" || year =="_D" || year =="_F"|| year =="_G"){
- if(year == ""){
- HUQ <- HUQ %>% select(SEQN, HUQ010, HUQ020, HUQ030, HUQ040, HUQ050, HUQ060, HUQ070, HUD080, HUQ090)
- }else if(year == "_B"){
- HUQ <- HUQ %>% select(SEQN, HUQ010, HUQ020, HUQ030, HUQ040, HUQ050, HUQ060, HUD070, HUQ080, HUQ090)
- }else{
- HUQ <- HUQ %>% select(SEQN, HUQ010, HUQ020, HUQ030, HUQ040, HUQ050, HUQ060, HUQ071, HUD080, HUQ090)
- }
- }else{
- HUQ <- HUQ %>% select(SEQN, HUQ010, HUQ020, HUQ030, HUQ041, HUQ051, HUQ061, HUQ071, HUD080, HUQ090)
- }
- names(HUQ) <- c("SEQN", "HUQ010", "HUQ020", "HUQ030", "HUQ041", "HUQ051", "HUQ061", "HUQ071", "HUD080", "HUQ090")
- data <- join_all(list(data, HUQ), by = "SEQN", type = "left")
- #健康状况 胃病、肠病、HIV病毒、流感等 最近献血
- HSQ <- read.xport(paste0(path, "HSQ",year,".XPT"))
- if(year == ""){
- HSQ <- HSQ %>% select(SEQN, HSQ500, HSQ510, HSQ520, HSQ570, HSQ580, HSQ590, HSAQUEX)
- }else if(year == "_B"){
- HSQ <- HSQ %>% select(SEQN, HSQ500, HSQ510, HSQ520, HSD570, HSQ580, HSQ590, HSAQUEX)
- }else{
- HSQ <- HSQ %>% select(SEQN, HSQ500, HSQ510, HSQ520, HSQ571, HSQ580, HSQ590, HSAQUEX)
- }
- names(HSQ) <- c("SEQN", "HSQ500", "HSQ510", "HSQ520", "HSQ571", "HSQ580", "HSQ590", "HSAQUEX")
- data <- join_all(list(data, HSQ), by = "SEQN", type = "left")
- #身体机能
- PFQ <- read.xport(paste0(path, "PFQ",year,".XPT"))
- if(year == ""){
- PFQ <- PFQ %>% select(SEQN, PFQ020, PFQ030, PFQ040, PFQ048, PFQ050, PFQ055, PFQ056,
- PFQ059, PFQ060A, PFQ060B, PFQ060C,PFQ060D,PFQ060E,PFQ060F,PFQ060G,PFQ060H,PFQ060I,PFQ060J,PFQ060K,
- PFQ060L,PFQ060M,PFQ060N,PFQ060O,PFQ060P,PFQ060Q,PFQ060R,PFQ060S, PFD067A, PFD067B,PFD067C,PFD067D,
- PFD067E, PFQ090)
- }else if(year == "_B"){
- PFQ <- PFQ %>% select(SEQN, PFQ020, PFQ030, PFD040, PFQ048, PFQ050, PFQ055, PFQ056,
- PFQ059, PFQ060A, PFQ060B, PFQ060C,PFQ060D,PFQ060E,PFQ060F,PFQ060G,PFQ060H,PFQ060I,PFQ060J,PFQ060K,
- PFQ060L,PFQ060M,PFQ060N,PFQ060O,PFQ060P,PFQ060Q,PFQ060R,PFQ060S, PFD067A, PFD067B,PFD067C,PFD067D,
- PFD067E, PFQ090)
- }else{
- PFQ <- PFQ %>% select(SEQN, PFQ020, PFQ030, PFQ041, PFQ049, PFQ051, PFQ054, PFQ057,
- PFQ059, PFQ061A, PFQ061B,PFQ061C,PFQ061D,PFQ061E,PFQ061F,PFQ061G,PFQ061H,PFQ061I,PFQ061J,PFQ061K,
- PFQ061L,PFQ061M,PFQ061N,PFQ061O,PFQ061P,PFQ061Q, PFQ061R,PFQ061S, PFQ063A, PFQ063B,PFQ063C,PFQ063D,
- PFQ063E, PFQ090)
- }
- names(PFQ) <- c("SEQN", "PFQ020", "PFQ030", "PFQ041", "PFQ049", "PFQ051", "PFQ054", "PFQ057",
- "PFQ059", "PFQ061A", "PFQ061B","PFQ061C","PFQ061D","PFQ061E","PFQ061F","PFQ061G","PFQ061H","PFQ061I","PFQ061J","PFQ061K",
- "PFQ061L","PFQ061M","PFQ061N","PFQ061O","PFQ061P","PFQ061Q", "PFQ061R","PFQ061S", "PFQ063A", "PFQ063B","PFQ063C","PFQ063D",
- "PFQ063E", "PFQ090")
- data <- join_all(list(data, PFQ), by = "SEQN", type = "left")
- #认知功能
- if(year=="" | year=="_B" | year=="_H" | year=="_G"){
- CFQ <- read.xport(paste0(path, "CFQ",year,".XPT"))
- if(year == "" || year == "_B"){
- CFQ <- CFQ %>% select(SEQN, CFDRIGHT)
- }else{
- CFQ <- CFQ %>% select(SEQN, CFDDS)
- }
- names(CFQ) <- c("SEQN", "CFDDS")
- data <- join_all(list(data, CFQ), by = "SEQN", type = "left")
- }else{
- data$CFDDS <- NA
- }
- mortality_result = read.csv(file = paste0(path, "mortality_result.csv"))
- data <- join_all(list(data, mortality_result), by = "SEQN", type = "left")
- write.csv(data, file = paste0(path, "result", year, ".csv"), row.names = FALSE)
- csv_files <- list.files(path = "NHANES", pattern = "\\.csv$", recursive = TRUE, full.names = TRUE)
- df_combined <- NA
- # 确保读取文件的路径是完整的
- if (length(csv_files) > 0) {
- for (file in csv_files) {
- if (grepl("mortality", file)){
- next
- }
- # 读取每个.csv文件
- data <- read.csv(file, stringsAsFactors = FALSE)
- print(ncol(data))
- if (length(df_combined) == 0){
- df_combined <- data
- }else{
- df_combined <- rbind(data, df_combined)
- }
- # 这里可以添加代码进行数据处理
- print(paste("Read file:", file))
- }
- }
- write.csv(df_combined, file = paste0("NHANES/", "result_all", ".csv"), row.names = FALSE)
- # data = read.csv(file = paste0(path, "result.csv"))
- # head(data)
- #加入污染物
- library(ncdf4)
- library(raster)
- nc <- nc_open("CHAP_PM2.5_Y1K_2009_V4.nc")
- data <- ncvar_get(nc, "PM2.5")
- print(data)
- lon <- ncvar_get(nc, "lon")
- tori <- ncvar_get(nc, "time")
- tunites <- ncatt_get(nc, "time", )
- writeRaster(x = nc, filename = 'pre1.tif', format='GTiff', overwrite=TRUE)
- # China_Health_and_Retirement_Longitudinal_Study_CHARLS
- # demo_china <- read.dta("CHARLS/CHARLS2011_Dataset/demographic_background.dta")
- # household_income_china <- read.dta("CHARLS/CHARLS2011_Dataset/household_income.dta")
- # # UKDA
- # demo_UK <- read.dta("UKDA-5050-stata/stata/stata13_se/wave_0_1998_data.dta")
|