123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417 |
- #biomarkers体检信息
- #community社区信息(只有2011年数据)
- #demographic_background个人信息
- #family_information家庭成员信息
- #family_transfer家庭经济关系
- #individual_income 个人收入及资产
- #household_income家户收入、支出及资产(2011和2013需要计算,转exp_income_wealth)
- #household_roster家庭成员信息(2011),后面分成三块parent, Child, Other_HHmember
- #housing_characteristics住房信息
- #Exit_Interview退出信息(2013)
- #Verbal_Autopsy死因信息(2013)
- #Exit_Module退出问卷(2020)
- #exp_income_wealth 家庭收入已统计好
- #interviewer_observation访问员观察
- #psu社区代码与城市对应关系
- #特色模块
- #health_care_and_insurance医疗保健与保险
- #health_status_and_functioning健康状况与功能
- #work_retirement_and_pension工作退休及养老金
- #注:中间有一些跳转操作需要判断,主要是和变量缺失进行区分
- # install.packages("haven")
- # install.packages("readstata13",repos = "https://mirrors.sjtug.sjtu.edu.cn/cran/")
- library(haven)
- library(readstata13)
- library(dplyr)
- year = "2020"
- path = "/root/r_base/CHARLS/"
- # 读取文件
- if(year == "2011"){
- demo <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/demographic_background.dta"))
- psu <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/psu.dta"), encoding = "GBK")
- biomarkers <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/biomarkers.dta"))
- blood <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Blood_20140429.dta"))
- health_status <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/health_status_and_functioning.dta"))
- health_care <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/health_care_and_insurance.dta"))
- exp_income <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/exp_income_wealth.dta"))
- }else if(year == "2013"){
- demo <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Demographic_Background.dta"))
- psu <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/PSU.dta"), encoding = "GBK")
- biomarkers <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Biomarker.dta"))
- health_status <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Health_Status_and_Functioning.dta"))
- health_care <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Health_Care_and_Insurance.dta"))
- exp_income <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/exp_income_wealth.dta"))
- }else if (year == "2015"){
- demo <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Demographic_Background.dta"))
- psu <- read_dta(paste0("/root/r_base/CHARLS/CHARLS","2013","/PSU.dta"), encoding = "GBK")
- biomarkers <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Biomarker.dta"))
- blood <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Blood.dta"))
- health_status <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Health_Status_and_Functioning.dta"))
- health_care <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Health_Care_and_Insurance.dta"))
- Household_Income <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Household_Income.dta"))
- Individual_Income <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Individual_Income.dta"))
- }else if(year == '2018'){
- demo <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Demographic_Background.dta"))
- psu <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",'2013',"/PSU.dta"), encoding = "GBK")
- health_status <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Health_Status_and_Functioning.dta"))
- health_care <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Health_Care_and_Insurance.dta"))
- Household_Income <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Household_Income.dta"))
- Individual_Income <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Individual_Income.dta"))
- Cognition <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Cognition.dta"))
- }else{
- demo <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Demographic_Background.dta"))
- psu <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",'2013',"/PSU.dta"), encoding = "GBK")
- health_status <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Health_Status_and_Functioning.dta"))
- Household_Income <- read_dta(paste0("CHARLS/CHARLS",year,"/Household_Income.dta"))
- Individual_Income <- read_dta(paste0("/root/r_base/CHARLS/CHARLS",year,"/Individual_Income.dta"))
- }
- #性别#年龄#居住地#婚姻状况
- if(year == '2011'){
- data <- demo[, c('ID','householdID', 'communityID','rgender','ba002_1','be001')]
- }else if(year == "2013"){
- data <- demo[, c('ID','householdID', 'communityID','ba000_w2_3','ba002_1','be001')]
- }else if(year == "2015"){
- data <- demo[, c('ID','householdID', 'communityID','ba000_w2_3', 'ba004_w3_1', 'be001')]
- }else if(year == "2018"){
- data <- demo[, c('ID','householdID', 'communityID','ba000_w2_3', 'ba004_w3_1', 'be001')]
- }else if(year == "2020"){
- data <- demo[, c('ID','householdID', 'communityID','ba001', 'ba003_1','ba011')]
- }
- #性别
- colnames(data)[4] <- "gender"
- #年龄
- colnames(data)[5] <- "born_year"
- #婚姻状况
- colnames(data)[6] <- "be001"
- data$age <- ifelse(is.na(data$born_year), NA, as.numeric(year)-data$born_year)
- data$wave <- year
- #居住地
- data <- merge(data, psu[,c('communityID', 'province', 'city')], by = "communityID", all.x = TRUE)
- #省份、城市名称和污染物数据格式对齐
- #海东地区->海东市
- data$city[data$city == "海东地区"] <- "海东市"
- #北京 -> 北京市
- data$city[data$city == "北京"] <- "北京市"
- data$province[data$province == "北京"] <- "北京市"
- #哈尔滨 -> 哈尔滨市
- data$city[data$city == "哈尔滨"] <- "哈尔滨市"
- #天津 -> 天津市
- data$city[data$city == "天津"] <- "天津市"
- data$province[data$province == "天津"] <- "天津市"
- #广西省 -> 广西壮族自治区
- data$province[data$province == "广西省"] <- "广西壮族自治区"
- #巢湖市 -> 合肥市
- data$city[data$city == "巢湖市"] <- "合肥市"
- #襄樊市->襄阳市
- data$city[data$city == "襄樊市"] <- "襄阳市"
- #身高#体重#收缩压#舒张压#脉搏
- if(year == '2011'){
- biomarkers_select <- biomarkers[, c('ID','householdID', 'communityID','qi002','ql002','qa011','qa012', 'qa013')]
- }else if(year == "2013"){
- biomarkers_select <- biomarkers[, c('ID','householdID', 'communityID','qi002','ql002','qa011','qa012', 'qa013')]
- }else if(year == "2015"){
- biomarkers_select <- biomarkers[, c('ID','householdID', 'communityID','qi002', 'ql002', 'qa011','qa012', 'qa013')]
- }
- if (year == '2011' | year == '2013' | year == '2015'){
- data <- merge(data, biomarkers_select, by = c('ID','householdID', 'communityID'), all.x = TRUE)
- }else{
- # 列名列表
- new_columns <- c('qi002', 'ql002', 'qa011','qa012', 'qa013')
- # 通过循环创建新的列并赋值为NA
- for (col_name in new_columns) {
- data[[col_name]] <- NA
- }
- }
- #白细胞(WBC),平均红血球容积MCV,血小板,血尿素氮bun,谷氨酸glu,血肌酐crea,总胆固醇cho,甘油三酯tg,高密度脂蛋白HDL,低密度脂蛋白胆固醇LDL,C反应蛋白CRP
- #糖化血红蛋白hba1c,尿酸ua,血细胞比容Hematocrit,血红蛋白hgb,胱抑素C
- if(year == '2011'){
- blood <- subset(blood, select = -c(bloodweight, qc1_va003))
- }else if(year == '2015'){
- blood <- blood[, c('ID', 'bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp'
- , 'bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc')]
- }
- if(year == '2011' | year == '2015'){
- colnames(blood) <- c('ID', 'bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp'
- , 'bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc')
- data <- merge(data, blood, by = c('ID'), all.x = TRUE)
- }else{
- # 列名列表
- new_columns <- c('bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp'
- , 'bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc')
- # 通过循环创建新的列并赋值为NA
- for (col_name in new_columns) {
- data[[col_name]] <- NA
- }
- }
- #健康状况与功能:
- if(year == '2018'){
- health_status$general_helth_status <- health_status$da002
- }else if(year =="2020"){
- health_status$general_helth_status <- health_status$da001
- }else{
- health_status$general_helth_status <- ifelse(is.na(health_status$da001), health_status$da002, health_status$da001)
- }
- #患病情况、运动情况、抽烟情况、饮酒情况
- if(year == '2013'){
- names(health_status)[names(health_status) %in% c("dc006_1_s1", "dc006_1_s2", 'dc006_1_s3', 'dc006_1_s4', 'dc006_1_s5','dc006_1_s6',
- 'dc006_1_s7', 'dc006_1_s8','dc006_1_s9', 'dc006_1_s10','dc006_1_s11')]<- c("dc006s1", "dc006s2", 'dc006s3',
- 'dc006s4', 'dc006s5','dc006s6', 'dc006s7','dc006s8', 'dc006s9', 'dc006s10', 'dc006s11')
- }else if(year == "2018"){
- names(Cognition)[names(Cognition) %in% c("dc001_w4", "dc006_w4",
- 'dc003_w4', 'dc005_w4', 'dc002_w4')]<- c("dc001s1", "dc001s2",'dc001s3','dc002','dc003')
- names(Cognition)[names(Cognition) %in% c("dc014_w4_1_1", "dc014_w4_2_1",
- 'dc014_w4_3_1', 'dc014_w4_4_1', 'dc014_w4_5_1')]<- c("dc019", "dc020",
- 'dc021', 'dc022', 'dc023')
- names(Cognition)[names(Cognition) %in% c("dc028_w4_s1", "dc028_w4_s2", 'dc028_w4_s3', 'dc028_w4_s4', 'dc028_w4_s5','dc028_w4_s6',
- 'dc028_w4_s7', 'dc028_w4_s8',
- 'dc028_w4_s9', 'dc028_w4_s10',
- 'dc028_w4_s11')]<- c("dc006s1", "dc006s2", 'dc006s3',
- 'dc006s4', 'dc006s5','dc006s6', 'dc006s7',
- 'dc006s8', 'dc006s9', 'dc006s10', 'dc006s11')
- names(Cognition)[names(Cognition) %in% c("dc047_w4_s1", "dc047_w4_s2", 'dc047_w4_s3', 'dc047_w4_s4', 'dc047_w4_s5','dc047_w4_s6',
- 'dc047_w4_s7', 'dc047_w4_s8',
- 'dc047_w4_s9', 'dc047_w4_s10',
- 'dc047_w4_s11', 'dc024_w4')]<- c("dc027s1", "dc027s2", 'dc027s3',
- 'dc027s4', 'dc027s5','dc027s6', 'dc027s7',
- 'dc027s8', 'dc027s9', 'dc027s10', 'dc027s11','dc025')
- }else if (year == "2020"){
- #词语记忆,第一遍
- names(health_status)[names(health_status) %in% c("dc012_s1", "dc012_s2", 'dc012_s3', 'dc012_s4', 'dc012_s5','dc012_s6','dc012_s7', 'dc012_s8','dc012_s9', 'dc012_s10',
- 'dc012_s11')]<- c("dc006s1", "dc006s2", 'dc006s3','dc006s4', 'dc006s5','dc006s6', 'dc006s7','dc006s8', 'dc006s9', 'dc006s10', 'dc006s11')
- #词语记忆,第二遍
- names(health_status)[names(health_status) %in% c("dc028_s1", "dc028_s2", 'dc028_s3', 'dc028_s4', 'dc028_s5','dc028_s6','dc028_s7', 'dc028_s8','dc028_s9', 'dc028_s10',
- 'dc028_s11')]<- c("dc027s1", "dc027s2", 'dc027s3','dc027s4', 'dc027s5','dc027s6', 'dc027s7',
- 'dc027s8', 'dc027s9', 'dc027s10', 'dc027s11')
- }
- #日常生活活动能力(ADL):包括上厕所、吃饭、穿衣、控制大小便、上下床、洗澡6个条目,若其中有一项需要他人帮助,则视为ADL失能。>0为失能
- if (year == "2020"){
- health_status$db010_score <- ifelse(health_status$db001 > 2, 1, 0)
- health_status$db011_score <- ifelse(health_status$db003 > 2, 1, 0)
- health_status$db012_score <- ifelse(health_status$db005 > 2, 1, 0)
- health_status$db013_score <- ifelse(health_status$db007 > 2, 1, 0)
- health_status$db014_score <- ifelse(health_status$db009 > 2, 1, 0)
- health_status$db015_score <- ifelse(health_status$db011 > 2, 1, 0)
- }else{
- health_status$db010_score <- ifelse(health_status$db010 > 2, 1, 0)
- health_status$db011_score <- ifelse(health_status$db011 > 2, 1, 0)
- health_status$db012_score <- ifelse(health_status$db012 > 2, 1, 0)
- health_status$db013_score <- ifelse(health_status$db013 > 2, 1, 0)
- health_status$db014_score <- ifelse(health_status$db014 > 2, 1, 0)
- health_status$db015_score <- ifelse(health_status$db015 > 2, 1, 0)
- }
- health_status$ADL_score <- apply(health_status[,c('db010_score','db011_score','db012_score', 'db013_score', 'db014_score'
- ,'db015_score')], 1, function(x) sum(x))
-
- #IADL:包括做家务、做饭、购物、吃药、管理财务5个条目,若其中有一项需要他人帮助,则视为IADL失能。
- if (year =='2020'){
- health_status$db016_score <- ifelse(health_status$db012 > 2, 1, 0)
- health_status$db017_score <- ifelse(health_status$db014 > 2, 1, 0)
- health_status$db018_score <- ifelse(health_status$db016 > 2, 1, 0)
- health_status$db019_score <- ifelse(health_status$db020 > 2, 1, 0)
- health_status$db020_score <- ifelse(health_status$db022 > 2, 1, 0)
- }else{
- health_status$db016_score <- ifelse(health_status$db016 > 2, 1, 0)
- health_status$db017_score <- ifelse(health_status$db017 > 2, 1, 0)
- health_status$db018_score <- ifelse(health_status$db018 > 2, 1, 0)
- health_status$db019_score <- ifelse(health_status$db019 > 2, 1, 0)
- health_status$db020_score <- ifelse(health_status$db020 > 2, 1, 0)
- }
- health_status$IADL_score <- apply(health_status[,c('db016_score','db017_score','db018_score', 'db019_score', 'db020_score')], 1, function(x) sum(x))
- if(year == "2020"){
- #2020年疾病的label和其他年份不一样,需要处理
- # 指定需要处理的列
- columns_to_process <- c('da002_1_', 'da002_2_','da002_3_'
- ,'da002_4_','da002_5_','da002_6_','da002_7_','da002_8_','da002_9_','da002_10_','da002_11_'
- ,'da002_12_','da002_13_','da002_14_','da002_15_')
- # 使用 mutate_at() 对指定列进行处理
- health_status <- health_status %>%
- mutate_at(vars(columns_to_process), ~ case_when(
- . == 99 ~ 2,
- . %in% 1:3 ~ 1,
- TRUE ~ NA_real_
- ))
- # 2020年把帕金森和记忆病症分开,需要和以前对齐
- # 使用 mutate() 和 case_when() 实现条件逻辑处理
- health_status <- health_status %>%
- mutate(
- da002_12_ = case_when(
- da002_12_ == 1 | da002_13_ == 1 ~ 1,
- da002_12_ == 2 & da002_13_ == 2 ~ 2,
- da002_12_ == 2 & is.na(da002_13_) | is.na(da002_12_) & da002_13_ == 2 ~ 2,
- is.na(da002_12_) & is.na(da002_13_) ~ NA_real_,
- TRUE ~ NA_real_ # 预防万一,其余情况下设为NA
- )
- )
- health_status_select <- health_status[, c('ID','householdID', 'communityID', 'general_helth_status'
- ,'ADL_score', 'IADL_score', 'da002_1_', 'da002_2_','da002_3_'
- ,'da002_4_','da002_5_','da002_6_','da002_7_','da002_8_','da002_9_','da002_10_','da002_11_'
- ,'da002_12_','da002_14_','da002_15_','da032_1_','da032_2_', 'da032_3_'
- ,'da033_1_','da033_2_','da033_3_','da034_1_','da034_2_','da034_3_','da035_1_','da035_2_','da035_3_'
- ,'da036_1_','da036_2_','da036_3_', 'da046','da047','da050_1'
- ,'da051')]
- health_status_select$da051 <- ifelse(health_status_select$da051==1, 3, ifelse(health_status_select$da051==3, 1, health_status_select$da051))
- }else{
- health_status_select <- health_status[, c('ID','householdID', 'communityID', 'general_helth_status'
- ,'ADL_score', 'IADL_score', 'da007_1_', 'da007_2_','da007_3_'
- ,'da007_4_','da007_5_','da007_6_','da007_7_','da007_8_','da007_9_','da007_10_','da007_11_'
- ,'da007_12_','da007_13_','da007_14_','da051_1_','da051_2_', 'da051_3_'
- ,'da052_1_','da052_2_','da052_3_','da053_1_','da053_2_','da053_3_','da054_1_','da054_2_','da054_3_'
- ,'da055_1_','da055_2_','da055_3_', 'da059','da061','da063'
- ,'da069')]
- }
- colnames(health_status_select) <- c('ID', 'householdID', 'communityID', 'general_helth_status'
- ,'ADL_score', 'IADL_score', 'Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar'
- ,'Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases', 'Liver_Disease', 'Heart_Problems', 'Stroke', ' Kidney_Diease'
- ,'Stomach_or_Other_Digestive_Disease', 'Emotional_Nervous_or_Psychiatric_Problems', ' Memory_Related_Disease',' Arthritis_or_Rheumatism'
- ,'Asthma', 'Vigorous_Activities', 'Moderate_Physical_Effort','Walking'
- ,'Vigorous_Activities_day', 'Moderate_Physical_Effort_day','Walking_day','Vigorous_Activities_2h', 'Moderate_Physical_Effort_2h','Walking_2h'
- ,'Vigorous_Activities_30m', 'Moderate_Physical_Effort_30m','Walking_30m','Vigorous_Activities_4h', 'Moderate_Physical_Effort_4h','Walking_4h'
- ,'Smoke', 'Smoke_still','Number_Cigarettes','Drink')
- data <- merge(data, health_status_select, by = c('ID', 'householdID', 'communityID'), all.x = TRUE)
- if(year =="2018"){
- health_status = Cognition
- }
- #计算认知功能得分,分成三部分:电话问卷10分,词语回忆20分、画图1分
- if(year == "2020"){
- health_status$dc001s1_score <- ifelse(is.na(health_status$dc001), 0, ifelse(health_status$dc001 == 1, 1, 0))
- health_status$dc001s2_score <- ifelse(is.na(health_status$dc005), 0, ifelse(health_status$dc005 == 2, 1, 0))
- health_status$dc001s3_score <- ifelse(is.na(health_status$dc003), 0, ifelse(health_status$dc003 == 3, 1, 0))
- health_status$dc002_score <- ifelse(is.na(health_status$dc004), 0, ifelse(health_status$dc004 == 1, 1, 0))
- health_status$dc003_score <- ifelse(is.na(health_status$dc002), 0, ifelse(health_status$dc002 == 1, 1, 0))
- health_status$dc019_score <- ifelse(is.na(health_status$dc007_1), 0, ifelse(health_status$dc007_1 == 93, 1, 0))
- health_status$dc020_score <- ifelse(is.na(health_status$dc007_2), 0, ifelse(health_status$dc007_2 == 86, 1, 0))
- health_status$dc021_score <- ifelse(is.na(health_status$dc007_3), 0, ifelse(health_status$dc007_3 == 79, 1, 0))
- health_status$dc022_score <- ifelse(is.na(health_status$dc007_4), 0, ifelse(health_status$dc007_4 == 72, 1, 0))
- health_status$dc023_score <- ifelse(is.na(health_status$dc007_5), 0, ifelse(health_status$dc007_5 == 65, 1, 0))
- }else{
- health_status$dc001s1_score <- ifelse(is.na(health_status$dc001s1), 0, ifelse(health_status$dc001s1 == 1, 1, 0))
- health_status$dc001s2_score <- ifelse(is.na(health_status$dc001s2), 0, ifelse(health_status$dc001s2 == 2, 1, 0))
- health_status$dc001s3_score <- ifelse(is.na(health_status$dc001s3), 0, ifelse(health_status$dc001s3 == 3, 1, 0))
- health_status$dc002_score <- ifelse(is.na(health_status$dc002), 0, ifelse(health_status$dc002 == 1, 1, 0))
- health_status$dc003_score <- ifelse(is.na(health_status$dc003), 0, ifelse(health_status$dc003 == 1, 1, 0))
- health_status$dc019_score <- ifelse(is.na(health_status$dc019), 0, ifelse(health_status$dc019 == 93, 1, 0))
- health_status$dc020_score <- ifelse(is.na(health_status$dc020), 0, ifelse(health_status$dc020 == 86, 1, 0))
- health_status$dc021_score <- ifelse(is.na(health_status$dc021), 0, ifelse(health_status$dc021 == 79, 1, 0))
- health_status$dc022_score <- ifelse(is.na(health_status$dc022), 0, ifelse(health_status$dc022 == 72, 1, 0))
- health_status$dc023_score <- ifelse(is.na(health_status$dc023), 0, ifelse(health_status$dc023 == 65, 1, 0))
- }
- health_status$Cognitive_functioning <- apply(health_status[,c('dc001s1_score','dc001s2_score','dc001s3_score', 'dc002_score', 'dc003_score'
- ,'dc019_score','dc020_score','dc021_score','dc022_score','dc023_score')], 1, function(x) sum(x))
- #词语记忆
- health_status$dc006s1_score <- ifelse(is.na(health_status$dc006s1), 0, ifelse(health_status$dc006s1 == 1, 1, 0))
- health_status$dc006s2_score <- ifelse(is.na(health_status$dc006s2), 0, ifelse(health_status$dc006s2 == 2, 1, 0))
- health_status$dc006s3_score <- ifelse(is.na(health_status$dc006s3), 0, ifelse(health_status$dc006s3 == 3, 1, 0))
- health_status$dc006s4_score <- ifelse(is.na(health_status$dc006s4), 0, ifelse(health_status$dc006s4 == 4, 1, 0))
- health_status$dc006s5_score <- ifelse(is.na(health_status$dc006s5), 0, ifelse(health_status$dc006s5 == 5, 1, 0))
- health_status$dc006s6_score <- ifelse(is.na(health_status$dc006s6), 0, ifelse(health_status$dc006s6 == 6, 1, 0))
- health_status$dc006s7_score <- ifelse(is.na(health_status$dc006s7), 0, ifelse(health_status$dc006s7 == 7, 1, 0))
- health_status$dc006s8_score <- ifelse(is.na(health_status$dc006s8), 0, ifelse(health_status$dc006s8 == 8, 1, 0))
- health_status$dc006s9_score <- ifelse(is.na(health_status$dc006s9), 0, ifelse(health_status$dc006s9 == 9, 1, 0))
- health_status$dc006s10_score <- ifelse(is.na(health_status$dc006s10), 0, ifelse(health_status$dc006s10 == 10, 1, 0))
- health_status$dc006s11_score <- ifelse(is.na(health_status$dc006s11), 0, ifelse(health_status$dc006s11 == 11, 1, 0))
- health_status$dc027s1_score <- ifelse(is.na(health_status$dc027s1), 0, ifelse(health_status$dc027s1 == 1, 1, 0))
- health_status$dc027s2_score <- ifelse(is.na(health_status$dc027s2), 0, ifelse(health_status$dc027s2 == 2, 1, 0))
- health_status$dc027s3_score <- ifelse(is.na(health_status$dc027s3), 0, ifelse(health_status$dc027s3 == 3, 1, 0))
- health_status$dc027s4_score <- ifelse(is.na(health_status$dc027s4), 0, ifelse(health_status$dc027s4 == 4, 1, 0))
- health_status$dc027s5_score <- ifelse(is.na(health_status$dc027s5), 0, ifelse(health_status$dc027s5 == 5, 1, 0))
- health_status$dc027s6_score <- ifelse(is.na(health_status$dc027s6), 0, ifelse(health_status$dc027s6 == 6, 1, 0))
- health_status$dc027s7_score <- ifelse(is.na(health_status$dc027s7), 0, ifelse(health_status$dc027s7 == 7, 1, 0))
- health_status$dc027s8_score <- ifelse(is.na(health_status$dc027s8), 0, ifelse(health_status$dc027s8 == 8, 1, 0))
- health_status$dc027s9_score <- ifelse(is.na(health_status$dc027s9), 0, ifelse(health_status$dc027s9 == 9, 1, 0))
- health_status$dc027s10_score <- ifelse(is.na(health_status$dc027s10), 0, ifelse(health_status$dc027s10 == 10, 1, 0))
- health_status$dc027s11_score <- ifelse(is.na(health_status$dc027s11), 0, ifelse(health_status$dc027s11 == 11, 1, 0))
- health_status$remenber_functioning <- apply(health_status[,c('dc006s1_score','dc006s2_score','dc006s3_score', 'dc006s4_score', 'dc006s5_score'
- ,'dc006s6_score','dc006s7_score','dc006s8_score','dc006s9_score','dc006s10_score'
- ,'dc006s11_score','dc027s1_score','dc027s2_score','dc027s3_score','dc027s4_score','dc027s5_score'
- ,'dc027s6_score','dc027s7_score','dc027s8_score','dc027s9_score','dc027s10_score','dc027s11_score')], 1, function(x) sum(x)/2)
- #画图
- if(year == "2020"){
- health_status$draw_score <- ifelse(is.na(health_status$dc009), 0, ifelse(health_status$dc009 == 1, 1, 0))
- }else{
- health_status$draw_score <- ifelse(is.na(health_status$dc025), 0, ifelse(health_status$dc025 == 1, 1, 0))
- }
- #心理得分
- if(year == '2020'){
- health_status$dc009_score <- health_status$dc016-1
- health_status$dc010_score <- health_status$dc017-1
- health_status$dc011_score <- health_status$dc018-1
- health_status$dc012_score <- health_status$dc019-1
- health_status$dc013_score <- 4 - health_status$dc020
- health_status$dc014_score <- health_status$dc021-1
- health_status$dc015_score <- health_status$dc022-1
- health_status$dc016_score <- 4 - health_status$dc023
- health_status$dc017_score <- health_status$dc024-1
- health_status$dc018_score <- health_status$dc025-1
- }else{
- health_status$dc009_score <- health_status$dc009-1
- health_status$dc010_score <- health_status$dc010-1
- health_status$dc011_score <- health_status$dc011-1
- health_status$dc012_score <- health_status$dc012-1
- health_status$dc013_score <- 4 - health_status$dc013
- health_status$dc014_score <- health_status$dc014-1
- health_status$dc015_score <- health_status$dc015-1
- health_status$dc016_score <- 4 - health_status$dc016
- health_status$dc017_score <- health_status$dc017-1
- health_status$dc018_score <- health_status$dc018-1
- }
-
- health_status$psychiatric_score <- apply(health_status[,c('dc009_score','dc010_score','dc011_score', 'dc012_score', 'dc013_score'
- ,'dc014_score','dc015_score','dc016_score','dc017_score','dc018_score')], 1, function(x) sum(x))
- health_status <- health_status[, c('ID','householdID', 'communityID','Cognitive_functioning','remenber_functioning'
- ,'draw_score','psychiatric_score')]
- colnames(health_status) <- c('ID', 'householdID', 'communityID','Cognitive_functioning','remenber_functioning'
- ,'draw_score','psychiatric_score')
- data <- merge(data, health_status, by = c('ID', 'householdID', 'communityID'), all.x = TRUE)
- #住院情况
- if (year != '2020'){
- health_care = health_care[, c('ID','householdID', 'communityID', 'ee003', 'ee004')]
- colnames(health_care) <- c('ID','householdID', 'communityID', 'received_inpatient_care',"Frequency_one_year")
- data <- merge(data, health_care, by = c('ID', 'householdID', 'communityID'), all.x = TRUE)
- }else{
- data['received_inpatient_care'] <- NA
- data['Frequency_one_year'] <- NA
- }
- #个人收入情况
- if (year == '2011' | year == '2013'){
- exp_income = exp_income[, c('ID','householdID', 'communityID','INDV_INCOME')]
- data <- merge(data, exp_income, by = c('ID', 'householdID', 'communityID'), all.x = TRUE)
- }else {
- Individual_Income$INDV_INCOME <- ifelse(Individual_Income$ga001==2, 0, Individual_Income$ga002)
- data <- merge(data, Individual_Income[,c('ID','householdID', 'communityID','INDV_INCOME')], by = c('ID', 'householdID', 'communityID'), all.x = TRUE)
- # Household_Income$INCOME_TOTAL <- apply(Household_Income[,c('ga006_1_1_','ga006_1_2_','ga006_1_3_'
- # ,'ga006_1_4_','ga006_1_5_','ga006_1_6_','ga006_1_7_','ga006_1_8_','ga006_1_9_','ga006_1_10_')], 1, function(x) sum(x, na.rm = TRUE))
- }
- write.csv(data, file = paste0(path, "result", year, ".csv"), row.names = FALSE)
- #合并
- csv_files <- list.files(path = path, pattern = "\\.csv$", recursive = TRUE, full.names = TRUE)
- df_combined <- NA
- # 确保读取文件的路径是完整的
- if (length(csv_files) > 0) {
- for (file in csv_files) {
- # 读取每个.csv文件
- data <- read.csv(file, stringsAsFactors = FALSE)
- print(ncol(data))
- if (length(df_combined) == 0){
- df_combined <- data
- }else{
- df_combined <- rbind(data, df_combined)
- }
- print(paste("Read file:", file))
- }
- }
- write.csv(df_combined, file = paste0("/root/r_base/CHARLS/", "result_all", ".csv"), row.names = FALSE)
|