2 maanden geleden · a4a6f31e89
--- a/CHARLS_P/CHARLS_NL.py
+++ b/CHARLS_P/CHARLS_NL.py
@@ -1,21 +1,20 @@
 
				 import pandas as pd
			
 
				 
			
 
				-
			
 
				-#读取CHARLS数据
			
 
				-CHARLS_data = pd.read_csv("CHARLS_data_pollutants.csv")
			
 
				+years = [2011, 2013,2015, 2018, 2020]
			
 
				 #读取夜光数据
			
 
				 pollutants_data = pd.read_csv("night_light_result.csv", encoding="utf-8")
			
 
				-#处理哪一年的数据
			
 
				-year = 2020
			
 
				-#新增两列，分别为year的去年和前年的环境值
			
 
				-# CHARLS_data[['last_year_pm2.5', "before_last_pm2.5"]]=''
			
 
				-#开始筛选出year的数据
			
 
				-CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
			
 
				-#两个表合并
			
 
				-table_merge = pd.merge(CHARLS_data_year, pollutants_data, left_on="city", right_on="ext_name", how='left')
			
 
				-# table_merge_last.to_csv("123.csv",index=False)
			
 
				-#更新CHARLS表
			
 
				-CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_nl'] = table_merge[str(year-1)].values
			
 
				-CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_nl'] = table_merge[str(year-2)].values
			
 
				-CHARLS_data.to_csv("CHARLS_data_pollutants.csv",index=False)
			
 
				-print(year)
			
 
				+for year in years:
			
 
				+    #读取CHARLS数据
			
 
				+    CHARLS_data = pd.read_csv("CHARLS_data_pollutants.csv")
			
 
				+    #新增两列，分别为year的去年和前年的环境值
			
 
				+    # CHARLS_data[['last_year_pm2.5', "before_last_pm2.5"]]=''
			
 
				+    #开始筛选出year的数据
			
 
				+    CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
			
 
				+    #两个表合并
			
 
				+    table_merge = pd.merge(CHARLS_data_year, pollutants_data, left_on="city", right_on="ext_name", how='left')
			
 
				+    # table_merge_last.to_csv("123.csv",index=False)
			
 
				+    #更新CHARLS表
			
 
				+    CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_nl'] = table_merge[str(year-1)].values
			
 
				+    CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_nl'] = table_merge[str(year-2)].values
			
 
				+    CHARLS_data.to_csv("CHARLS_data_pollutants.csv",index=False)
			
 
				+    print(year)
			
--- a/CHARLS_P/CHARLS_PM.py
+++ b/CHARLS_P/CHARLS_PM.py
@@ -2,58 +2,61 @@ import pandas as pd
 
				 from glob import glob
			
 
				 import os
			
 
				 
			
 
				-def pollutant_handle(CHARLS_data):
			
 
				+def pollutant_handle(path):
			
 
				+    years = [2011, 2013,2015, 2018, 2020]
			
 
				     #读取污染物数据
			
 
				-    pollutants_data = pd.read_csv("result_O3_p.csv")
			
 
				-    #处理哪一年的数据
			
 
				-    year = 2020
			
 
				-    #开始筛选出year的数据
			
 
				-    CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
			
 
				-    #两个表合并
			
 
				-    table_merge = pd.merge(CHARLS_data_year, pollutants_data, on=['province', 'city'], how='left')
			
 
				-    #更新CHARLS表
			
 
				-    CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_O3'] = table_merge[str(year-1)].values
			
 
				-    CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_O3'] = table_merge[str(year-2)].values
			
 
				-    CHARLS_data.to_csv("CHARLS_data_pollutants.csv",index=False)
			
 
				-    print(year)
			
 
				+    pollutants_data = pd.read_csv("pollution/result_O3_p.csv")
			
 
				+    for year in years:
			
 
				+        CHARLS_data = pd.read_csv(path)
			
 
				+        print(CHARLS_data.info())
			
 
				+        #开始筛选出year的数据
			
 
				+        CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
			
 
				+        #两个表合并
			
 
				+        table_merge = pd.merge(CHARLS_data_year, pollutants_data, on=['province', 'city'], how='left')
			
 
				+        #更新CHARLS表
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_O3'] = table_merge[str(year-1)].values
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_O3'] = table_merge[str(year-2)].values
			
 
				+        CHARLS_data.to_csv("CHARLS_data_pollutants.csv",index=False)
			
 
				+        print(year)
			
 
				 
			
 
				-def aba_handle(CHARLS_data):
			
 
				-    #处理CHARLS数据的年份
			
 
				-    year = 2020
			
 
				-    path = "aba627/result/"
			
 
				-    #读取污染物组分
			
 
				-    last_year_file_name = path+str(year-1)+"_PM25_and_species_p.csv"
			
 
				-    before_last_file_name = path+str(year-2)+"_PM25_and_species_p.csv"
			
 
				-    last_year_pollutants_data = pd.read_csv(last_year_file_name)
			
 
				-    before_last_pollutants_data = pd.read_csv(before_last_file_name)
			
 
				-    #开始筛选出year的数据
			
 
				-    CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
			
 
				-    #和上一年的污染物组分文件合并
			
 
				-    last_table_merge = pd.merge(CHARLS_data_year, last_year_pollutants_data, on=['province', 'city'], how='left')
			
 
				-    CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_SO4'] = last_table_merge["SO4"].values
			
 
				-    CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_NO3'] = last_table_merge["NO3"].values
			
 
				-    CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_NH4'] = last_table_merge["NH4"].values
			
 
				-    CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_OM'] = last_table_merge["OM"].values
			
 
				-    CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_BC'] = last_table_merge["BC"].values
			
 
				-    #和上上年的污染物组分文件合并
			
 
				-    before_last_table_merge = pd.merge(CHARLS_data_year, before_last_pollutants_data, on=['province', 'city'], how='left')
			
 
				-    CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_SO4'] = before_last_table_merge["SO4"].values
			
 
				-    CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_NO3'] = before_last_table_merge["NO3"].values
			
 
				-    CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_NH4'] = before_last_table_merge["NH4"].values
			
 
				-    CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_OM'] = before_last_table_merge["OM"].values
			
 
				-    CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_BC'] = before_last_table_merge["BC"].values
			
 
				-    #更新CHARLS表
			
 
				-    CHARLS_data.to_csv("CHARLS_data_pollutants.csv",index=False)
			
 
				-    print(year)
			
 
				+def aba_handle(path_data):
			
 
				+    years = [2011, 2013,2015, 2018, 2020]
			
 
				+    for year in years:
			
 
				+        CHARLS_data = pd.read_csv(path_data)
			
 
				+        path = "aba627/result/"
			
 
				+        #读取污染物组分
			
 
				+        last_year_file_name = path+str(year-1)+"_PM25_and_species_p.csv"
			
 
				+        before_last_file_name = path+str(year-2)+"_PM25_and_species_p.csv"
			
 
				+        last_year_pollutants_data = pd.read_csv(last_year_file_name)
			
 
				+        before_last_pollutants_data = pd.read_csv(before_last_file_name)
			
 
				+        #开始筛选出year的数据
			
 
				+        CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
			
 
				+        #和上一年的污染物组分文件合并
			
 
				+        last_table_merge = pd.merge(CHARLS_data_year, last_year_pollutants_data, on=['province', 'city'], how='left')
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_SO4'] = last_table_merge["SO4"].values
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_NO3'] = last_table_merge["NO3"].values
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_NH4'] = last_table_merge["NH4"].values
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_OM'] = last_table_merge["OM"].values
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_BC'] = last_table_merge["BC"].values
			
 
				+        #和上上年的污染物组分文件合并
			
 
				+        before_last_table_merge = pd.merge(CHARLS_data_year, before_last_pollutants_data, on=['province', 'city'], how='left')
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_SO4'] = before_last_table_merge["SO4"].values
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_NO3'] = before_last_table_merge["NO3"].values
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_NH4'] = before_last_table_merge["NH4"].values
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_OM'] = before_last_table_merge["OM"].values
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_BC'] = before_last_table_merge["BC"].values
			
 
				+        #更新CHARLS表
			
 
				+        CHARLS_data.to_csv("CHARLS_data_pollutants.csv",index=False)
			
 
				+        print(year)
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     #读取CHARLS数据
			
 
				-    CHARLS_data = pd.read_csv("CHARLS_data_pollutants.csv")
			
 
				-    print(CHARLS_data.info())
			
 
				-    # CHARLS_data1 = pd.read_csv("NHANES/result_all.csv")
			
 
				-    # print(CHARLS_data1.info())
			
 
				+    path = "CHARLS_data_pollutants.csv"
			
 
				+    # CHARLS_data = pd.read_csv("CHARLS/result_all_new.csv")
			
 
				+    # print(CHARLS_data.info())
			
 
				+    # CHARLS_data.to_csv("CHARLS_data_pollutants.csv",index=False)
			
 
				     
			
 
				     #处理污染物
			
 
				-    # pollutant_handle(CHARLS_data)
			
 
				+    # pollutant_handle(path)
			
 
				     #处理PM2.5组分
			
 
				-    # aba_handle(CHARLS_data)
			
 
				+    aba_handle(path)
			
--- a/CHARLS_P/CHARLS_preprocess_main.py
+++ b/CHARLS_P/CHARLS_preprocess_main.py
@@ -0,0 +1,676 @@
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+import pyreadstat
			
 
				+
			
 
				+#统一列名
			
 
				+def change_columns(df):
			
 
				+    df.columns = ["ID",'householdID','communityID','sex', "birth_year", "marital_status" , 'province', 'city',"Height", "Weight",
			
 
				+                  "Systolic","Diastolic",
			
 
				+
			
 
				+                  'bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp', 
			
 
				+                  'bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc',
			
 
				+
			
 
				+                  'Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases', 
			
 
				+                  'Liver_Disease', 'Heart_Problems', 'Stroke', ' Kidney_Diease','Stomach_or_Other_Digestive_Disease', 
			
 
				+                  'Emotional_Nervous_or_Psychiatric_Problems', ' Memory_Related_Disease',' Arthritis_or_Rheumatism','Asthma',
			
 
				+                  
			
 
				+                  'Vigorous_Activities', 'Moderate_Physical_Effort','Walking','Vigorous_Activities_day', 'Moderate_Physical_Effort_day',
			
 
				+                  'Walking_day','Vigorous_Activities_2h', 'Moderate_Physical_Effort_2h','Walking_2h','Vigorous_Activities_30m', 
			
 
				+                  'Moderate_Physical_Effort_30m','Walking_30m','Vigorous_Activities_4h', 'Moderate_Physical_Effort_4h','Walking_4h',
			
 
				+                  
			
 
				+                  'Smoke', 'Smoke_still','Number_Cigarettes','Drink',
			
 
				+                  
			
 
				+                  "Cognition_score", "Psychiatric_score", "wave"
			
 
				+                  ]
			
 
				+# 2020年把帕金森和记忆病症分开，需要和以前对齐   
			
 
				+def process_row(row):
			
 
				+    da002_12_ = row['da003_12_']
			
 
				+    da002_13_ = row['da003_13_']
			
 
				+    
			
 
				+    if da002_12_ == 1 or da002_13_ == 1:
			
 
				+        return 1
			
 
				+    elif da002_12_ == 2 and da002_13_ == 2:
			
 
				+        return 2
			
 
				+    elif (da002_12_ == 2 and pd.isna(da002_13_)) or (pd.isna(da002_12_) and da002_13_ == 2):
			
 
				+        return 2
			
 
				+    elif pd.isna(da002_12_) and pd.isna(da002_13_):
			
 
				+        return np.nan
			
 
				+    else:
			
 
				+        return np.nan  # 预防万一，其余情况下设为NA
			
 
				+    
			
 
				+def update_da051(value):
			
 
				+    if value == 1:
			
 
				+        return 3
			
 
				+    elif value == 3:
			
 
				+        return 1
			
 
				+    else:
			
 
				+        return value
			
 
				+    
			
 
				+if __name__ == "__main__":
			
 
				+    # 2011年
			
 
				+    year = "2011"
			
 
				+    demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/demographic_background.dta")
			
 
				+    psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/psu.dta", encoding='gbk')
			
 
				+    biomarkers, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/biomarkers.dta")
			
 
				+    blood, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Blood_20140429.dta")
			
 
				+    health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/health_status_and_functioning.dta")
			
 
				+    health_care, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/health_care_and_insurance.dta")
			
 
				+    exp_income, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/exp_income_wealth.dta")
			
 
				+
			
 
				+    #性别#年龄#居住地#婚姻状况
			
 
				+    # 1 Married with spouse present
			
 
				+    # 2 Married but not living with spouse temporarily for reasons such as work
			
 
				+    # 3 Separated
			
 
				+    # 4 Divorced
			
 
				+    # 5 Widowed
			
 
				+    # 6 Never married
			
 
				+    data_2011 = demo[['ID','householdID', 'communityID','rgender','ba002_1','be001']]
			
 
				+
			
 
				+    #居住地
			
 
				+    data_2011 = pd.merge(data_2011, psu[['communityID', 'province', 'city']], on = "communityID", how="left")
			
 
				+
			
 
				+    #身高#体重#收缩压#舒张压
			
 
				+    biomarkers_select = biomarkers[['ID','householdID', 'communityID','qi002','ql002','qa011','qa012']]
			
 
				+    data_2011 = pd.merge(data_2011, biomarkers_select, on = ["ID", "householdID", "communityID"], how="left")
			
 
				+
			
 
				+    #白细胞（WBC），平均红血球容积MCV,血小板,血尿素氮bun,葡萄糖glu,血肌酐crea,总胆固醇cho,甘油三酯tg,高密度脂蛋白HDL,低密度脂蛋白胆固醇LDL,C反应蛋白CRP
			
 
				+    #糖化血红蛋白hba1c,尿酸ua,血细胞比容Hematocrit,血红蛋白hgb,胱抑素C
			
 
				+    blood = blood.loc[:, blood.columns.difference(["bloodweight", "qc1_va003"])]
			
 
				+    data_2011 = pd.merge(data_2011, blood, on = ["ID"], how="left")
			
 
				+    # 慢性病：
			
 
				+    # (1)  Hypertension 高血压病    
			
 
				+    # (2)	Dyslipidemia (elevation of low density lipoprotein, triglycerides (TGs),and total cholesterol, or a low high density lipoprotein level)血脂异常（包括低密度脂蛋白、甘油三酯、总胆固醇的升高或（和）高密度脂蛋白的下降）
			
 
				+    # (3)	Diabetes or high blood sugar糖尿病或血糖升高（包括糖耐量异常和空腹血糖升高）
			
 
				+    # (4)	Cancer or malignant tumor (excluding minor skin cancers) 癌症等恶性肿瘤（不包括轻度皮肤癌）
			
 
				+    # (5)	Chronic lung diseases, such as chronic bronchitis , emphysema ( excluding tumors, or cancer) 慢性肺部疾患如慢性支气管炎或肺气肿、肺心病（不包括肿瘤或癌）
			
 
				+    #        (6)  Liver disease (except fatty liver, tumors, and cancer) 肝脏疾病
			
 
				+    # （除脂肪肝、肿瘤或癌外）
			
 
				+    # (7)	Heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems 心脏病（如心肌梗塞、冠心病、心绞痛、充血性心力衰竭和其他心脏疾病）
			
 
				+    # (8)	 Stroke  中风
			
 
				+    # (9)	 Kidney disease (except for tumor or cancer) 肾脏疾病（不包括肿瘤或癌）
			
 
				+    # (10)	 Stomach or other digestive disease (except for tumor or cancer) 胃部疾病或消化系统疾病（不包括肿瘤或癌）
			
 
				+    # (11)	 Emotional, nervous, or psychiatric problems 情感及精神方面问题 
			
 
				+    # (12)	 Memory-related disease 与记忆相关的疾病 （如老年痴呆症、脑萎缩、帕金森症）
			
 
				+    # (13)	 Arthritis or rheumatism 关节炎或风湿病
			
 
				+    # (14)  Asthma  哮喘
			
 
				+    health_status_select = health_status[['ID','householdID', 'communityID', 'da007_1_', 'da007_2_','da007_3_'
			
 
				+                                   ,'da007_4_','da007_5_','da007_6_','da007_7_','da007_8_','da007_9_','da007_10_','da007_11_'
			
 
				+                                   ,'da007_12_','da007_13_','da007_14_','da051_1_','da051_2_', 'da051_3_'
			
 
				+                                   ,'da052_1_','da052_2_','da052_3_','da053_1_','da053_2_','da053_3_','da054_1_','da054_2_','da054_3_'
			
 
				+                                   ,'da055_1_','da055_2_','da055_3_', 'da059','da061','da063'
			
 
				+                                   ,'da069']]
			
 
				+    
			
 
				+    data_2011 = pd.merge(data_2011, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
			
 
				+
			
 
				+    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
			
 
				+    health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc022_score"] = health_status["dc022"].apply(lambda x : 1 if x==72 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc023_score"] = health_status["dc023"].apply(lambda x : 1 if x==65 else 0 if pd.isna(x) else 0)
			
 
				+
			
 
				+    #词语记忆
			
 
				+    health_status["dc006s1_score"] = health_status["dc006s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc006s2_score"] = health_status["dc006s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc006s3_score"] = health_status["dc006s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc006s4_score"] = health_status["dc006s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s5_score"] = health_status["dc006s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s6_score"] = health_status["dc006s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc006s7_score"] = health_status["dc006s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s8_score"] = health_status["dc006s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s9_score"] = health_status["dc006s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc006s10_score"] = health_status["dc006s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                           
			
 
				+    health_status["dc006s11_score"] = health_status["dc006s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s1_score"] = health_status["dc027s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s2_score"] = health_status["dc027s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s3_score"] = health_status["dc027s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s4_score"] = health_status["dc027s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s5_score"] = health_status["dc027s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s6_score"] = health_status["dc027s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc027s7_score"] = health_status["dc027s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s8_score"] = health_status["dc027s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s9_score"] = health_status["dc027s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc027s10_score"] = health_status["dc027s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc027s11_score"] = health_status["dc027s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
			
 
				+    #画图
			
 
				+    health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0)
			
 
				+
			
 
				+    data_2011["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
			
 
				+        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
			
 
				+        health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
			
 
				+        health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
			
 
				+        health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
			
 
				+        health_status["dc006s5_score"] + health_status["dc006s6_score"] + health_status["dc006s7_score"] + \
			
 
				+        health_status["dc006s8_score"] + health_status["dc006s9_score"] + health_status["dc006s10_score"] + \
			
 
				+        health_status["dc006s11_score"] + health_status["dc027s1_score"]+ health_status["dc027s2_score"]+ \
			
 
				+        health_status["dc027s3_score"]+ health_status["dc027s4_score"]+ health_status["dc027s5_score"]+ \
			
 
				+        health_status["dc027s6_score"]+ health_status["dc027s7_score"]+ health_status["dc027s8_score"]+ \
			
 
				+        health_status["dc027s9_score"]+health_status["dc027s10_score"]+health_status["dc027s11_score"]+\
			
 
				+        health_status["draw_score"]
			
 
				+    #心理得分
			
 
				+    health_status["dc009_score"] = health_status["dc009"]-1
			
 
				+    health_status["dc010_score"] = health_status["dc010"]-1
			
 
				+    health_status["dc011_score"] = health_status["dc011"]-1
			
 
				+    health_status["dc012_score"] = health_status["dc012"]-1   
			
 
				+    health_status["dc013_score"] = 4 - health_status["dc013"] 
			
 
				+    health_status["dc014_score"] = health_status["dc014"]-1   
			
 
				+    health_status["dc015_score"] = health_status["dc015"]-1   
			
 
				+    health_status["dc016_score"] = 4 - health_status["dc016"]
			
 
				+    health_status["dc017_score"] = health_status["dc017"]-1   
			
 
				+    health_status["dc018_score"] = health_status["dc018"]-1 
			
 
				+    data_2011["psychiatric_score"] = health_status["dc009_score"] + health_status["dc010_score"] + health_status["dc011_score"] + \
			
 
				+        health_status["dc012_score"] + health_status["dc013_score"] + health_status["dc014_score"] + health_status["dc015_score"] + \
			
 
				+        health_status["dc016_score"] + health_status["dc017_score"] + health_status["dc018_score"]
			
 
				+    data_2011["wave"] = year
			
 
				+    change_columns(data_2011)
			
 
				+
			
 
				+    # 2013年
			
 
				+    year = "2013"
			
 
				+    demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Demographic_Background.dta")
			
 
				+    psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/PSU.dta", encoding='gbk')
			
 
				+    biomarkers, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Biomarker.dta")
			
 
				+    health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Status_and_Functioning.dta")
			
 
				+    health_care, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Care_and_Insurance.dta")
			
 
				+    exp_income, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/exp_income_wealth.dta")
			
 
				+
			
 
				+    #性别#年龄#婚姻状况
			
 
				+    # 1 Married with spouse present
			
 
				+    # 2 Married but not living with spouse temporarily for reasons such as work
			
 
				+    # 3 Separated
			
 
				+    # 4 Divorced
			
 
				+    # 5 Widowed
			
 
				+    # 6 Never married
			
 
				+    data_2013 = demo[['ID','householdID', 'communityID','ba000_w2_3','zba002_1','be001']]
			
 
				+    #居住地
			
 
				+    data_2013 = pd.merge(data_2013, psu[['communityID', 'province', 'city']], on = "communityID", how="left")
			
 
				+
			
 
				+    #身高#体重#收缩压#舒张压
			
 
				+    biomarkers_select = biomarkers[['ID','householdID', 'communityID','qi002','ql002','qa011','qa012']]
			
 
				+    data_2013 = pd.merge(data_2013, biomarkers_select, on = ["ID", "householdID", "communityID"], how="left")
			
 
				+
			
 
				+    #白细胞（WBC），平均红血球容积MCV,血小板,血尿素氮bun,葡萄糖glu,血肌酐crea,总胆固醇cho,甘油三酯tg,高密度脂蛋白HDL,低密度脂蛋白胆固醇LDL,C反应蛋白CRP
			
 
				+    #糖化血红蛋白hba1c,尿酸ua,血细胞比容Hematocrit,血红蛋白hgb,胱抑素C
			
 
				+    data_2013[['bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp','bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc']]=np.nan
			
 
				+    
			
 
				+    # 慢性病：
			
 
				+    # (1)  Hypertension 高血压病    
			
 
				+    # (2)	Dyslipidemia (elevation of low density lipoprotein, triglycerides (TGs),and total cholesterol, or a low high density lipoprotein level)血脂异常（包括低密度脂蛋白、甘油三酯、总胆固醇的升高或（和）高密度脂蛋白的下降）
			
 
				+    # (3)	Diabetes or high blood sugar糖尿病或血糖升高（包括糖耐量异常和空腹血糖升高）
			
 
				+    # (4)	Cancer or malignant tumor (excluding minor skin cancers) 癌症等恶性肿瘤（不包括轻度皮肤癌）
			
 
				+    # (5)	Chronic lung diseases, such as chronic bronchitis , emphysema ( excluding tumors, or cancer) 慢性肺部疾患如慢性支气管炎或肺气肿、肺心病（不包括肿瘤或癌）
			
 
				+    #        (6)  Liver disease (except fatty liver, tumors, and cancer) 肝脏疾病
			
 
				+    # （除脂肪肝、肿瘤或癌外）
			
 
				+    # (7)	Heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems 心脏病（如心肌梗塞、冠心病、心绞痛、充血性心力衰竭和其他心脏疾病）
			
 
				+    # (8)	 Stroke  中风
			
 
				+    # (9)	 Kidney disease (except for tumor or cancer) 肾脏疾病（不包括肿瘤或癌）
			
 
				+    # (10)	 Stomach or other digestive disease (except for tumor or cancer) 胃部疾病或消化系统疾病（不包括肿瘤或癌）
			
 
				+    # (11)	 Emotional, nervous, or psychiatric problems 情感及精神方面问题 
			
 
				+    # (12)	 Memory-related disease 与记忆相关的疾病 （如老年痴呆症、脑萎缩、帕金森症）
			
 
				+    # (13)	 Arthritis or rheumatism 关节炎或风湿病
			
 
				+    # (14)  Asthma  哮喘
			
 
				+    health_status_select = health_status[['ID','householdID', 'communityID', 'da007_1_', 'da007_2_','da007_3_'
			
 
				+                                   ,'da007_4_','da007_5_','da007_6_','da007_7_','da007_8_','da007_9_','da007_10_','da007_11_'
			
 
				+                                   ,'da007_12_','da007_13_','da007_14_','da051_1_','da051_2_', 'da051_3_'
			
 
				+                                   ,'da052_1_','da052_2_','da052_3_','da053_1_','da053_2_','da053_3_','da054_1_','da054_2_','da054_3_'
			
 
				+                                   ,'da055_1_','da055_2_','da055_3_', 'da059','da061','da063'
			
 
				+                                   ,'da069']]
			
 
				+    
			
 
				+    data_2013 = pd.merge(data_2013, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
			
 
				+
			
 
				+    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
			
 
				+    health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc022_score"] = health_status["dc022"].apply(lambda x : 1 if x==72 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc023_score"] = health_status["dc023"].apply(lambda x : 1 if x==65 else 0 if pd.isna(x) else 0)
			
 
				+
			
 
				+    #词语记忆
			
 
				+    health_status["dc006s1_score"] = health_status["dc006_1_s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc006s2_score"] = health_status["dc006_1_s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc006s3_score"] = health_status["dc006_1_s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc006s4_score"] = health_status["dc006_1_s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s5_score"] = health_status["dc006_1_s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s6_score"] = health_status["dc006_1_s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc006s7_score"] = health_status["dc006_1_s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s8_score"] = health_status["dc006_1_s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s9_score"] = health_status["dc006_1_s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc006s10_score"] = health_status["dc006_1_s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                           
			
 
				+    health_status["dc006s11_score"] = health_status["dc006_1_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s1_score"] = health_status["dc027s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s2_score"] = health_status["dc027s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s3_score"] = health_status["dc027s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s4_score"] = health_status["dc027s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s5_score"] = health_status["dc027s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s6_score"] = health_status["dc027s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc027s7_score"] = health_status["dc027s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s8_score"] = health_status["dc027s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s9_score"] = health_status["dc027s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc027s10_score"] = health_status["dc027s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc027s11_score"] = health_status["dc027s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
			
 
				+    #画图
			
 
				+    health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0)
			
 
				+
			
 
				+    data_2013["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
			
 
				+        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
			
 
				+        health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
			
 
				+        health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
			
 
				+        health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
			
 
				+        health_status["dc006s5_score"] + health_status["dc006s6_score"] + health_status["dc006s7_score"] + \
			
 
				+        health_status["dc006s8_score"] + health_status["dc006s9_score"] + health_status["dc006s10_score"] + \
			
 
				+        health_status["dc006s11_score"] + health_status["dc027s1_score"]+ health_status["dc027s2_score"]+ \
			
 
				+        health_status["dc027s3_score"]+ health_status["dc027s4_score"]+ health_status["dc027s5_score"]+ \
			
 
				+        health_status["dc027s6_score"]+ health_status["dc027s7_score"]+ health_status["dc027s8_score"]+ \
			
 
				+        health_status["dc027s9_score"]+health_status["dc027s10_score"]+health_status["dc027s11_score"]+\
			
 
				+        health_status["draw_score"]
			
 
				+    #心理得分
			
 
				+    health_status["dc009_score"] = health_status["dc009"]-1
			
 
				+    health_status["dc010_score"] = health_status["dc010"]-1
			
 
				+    health_status["dc011_score"] = health_status["dc011"]-1
			
 
				+    health_status["dc012_score"] = health_status["dc012"]-1   
			
 
				+    health_status["dc013_score"] = 4 - health_status["dc013"] 
			
 
				+    health_status["dc014_score"] = health_status["dc014"]-1   
			
 
				+    health_status["dc015_score"] = health_status["dc015"]-1   
			
 
				+    health_status["dc016_score"] = 4 - health_status["dc016"]
			
 
				+    health_status["dc017_score"] = health_status["dc017"]-1   
			
 
				+    health_status["dc018_score"] = health_status["dc018"]-1 
			
 
				+    data_2013["psychiatric_score"] = health_status["dc009_score"] + health_status["dc010_score"] + health_status["dc011_score"] + \
			
 
				+        health_status["dc012_score"] + health_status["dc013_score"] + health_status["dc014_score"] + health_status["dc015_score"] + \
			
 
				+        health_status["dc016_score"] + health_status["dc017_score"] + health_status["dc018_score"]
			
 
				+    data_2013["wave"] = year
			
 
				+    change_columns(data_2013)
			
 
				+    data_2013 = pd.concat([data_2011, data_2013], axis=0)
			
 
				+
			
 
				+    # 2015年
			
 
				+    year = "2015"
			
 
				+    demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Demographic_Background.dta")
			
 
				+    psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2013/PSU.dta", encoding='gbk')
			
 
				+    blood, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Blood.dta")
			
 
				+    biomarkers, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Biomarker.dta")
			
 
				+    health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Status_and_Functioning.dta")
			
 
				+    health_care, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Care_and_Insurance.dta")
			
 
				+
			
 
				+    #性别#年龄#婚姻状况
			
 
				+    # 1 Married with spouse present
			
 
				+    # 2 Married but not living with spouse temporarily for reasons such as work
			
 
				+    # 3 Separated
			
 
				+    # 4 Divorced
			
 
				+    # 5 Widowed
			
 
				+    # 6 Never married
			
 
				+    data_2015 = demo[['ID','householdID', 'communityID','ba000_w2_3', 'ba004_w3_1', 'be001']]
			
 
				+    # 处理出生年的问题
			
 
				+    data_2015['ba004_w3_1'] = demo.apply(lambda row: row['ba002_1'] if row['ba002'] == 2 else row['ba004_w3_1'], axis=1)
			
 
				+
			
 
				+    #居住地
			
 
				+    data_2015 = pd.merge(data_2015, psu[['communityID', 'province', 'city']], on = "communityID", how="left")
			
 
				+
			
 
				+    #身高#体重#收缩压#舒张压
			
 
				+    biomarkers_select = biomarkers[['ID','householdID', 'communityID','qi002', 'ql002', 'qa011','qa012']]
			
 
				+    data_2015 = pd.merge(data_2015, biomarkers_select, on = ["ID", "householdID", "communityID"], how="left")
			
 
				+
			
 
				+    #白细胞（WBC），平均红血球容积MCV,血小板,血尿素氮bun,葡萄糖glu,血肌酐crea,总胆固醇cho,甘油三酯tg,高密度脂蛋白HDL,低密度脂蛋白胆固醇LDL,C反应蛋白CRP
			
 
				+    #糖化血红蛋白hba1c,尿酸ua,血细胞比容Hematocrit,血红蛋白hgb,胱抑素C
			
 
				+    blood = blood[['ID', 'bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp','bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc']]
			
 
				+    data_2015 = pd.merge(data_2015, blood, on = ["ID"], how="left")
			
 
				+    
			
 
				+    # 慢性病：
			
 
				+    # (1)  Hypertension 高血压病    
			
 
				+    # (2)	Dyslipidemia (elevation of low density lipoprotein, triglycerides (TGs),and total cholesterol, or a low high density lipoprotein level)血脂异常（包括低密度脂蛋白、甘油三酯、总胆固醇的升高或（和）高密度脂蛋白的下降）
			
 
				+    # (3)	Diabetes or high blood sugar糖尿病或血糖升高（包括糖耐量异常和空腹血糖升高）
			
 
				+    # (4)	Cancer or malignant tumor (excluding minor skin cancers) 癌症等恶性肿瘤（不包括轻度皮肤癌）
			
 
				+    # (5)	Chronic lung diseases, such as chronic bronchitis , emphysema ( excluding tumors, or cancer) 慢性肺部疾患如慢性支气管炎或肺气肿、肺心病（不包括肿瘤或癌）
			
 
				+    #        (6)  Liver disease (except fatty liver, tumors, and cancer) 肝脏疾病
			
 
				+    # （除脂肪肝、肿瘤或癌外）
			
 
				+    # (7)	Heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems 心脏病（如心肌梗塞、冠心病、心绞痛、充血性心力衰竭和其他心脏疾病）
			
 
				+    # (8)	 Stroke  中风
			
 
				+    # (9)	 Kidney disease (except for tumor or cancer) 肾脏疾病（不包括肿瘤或癌）
			
 
				+    # (10)	 Stomach or other digestive disease (except for tumor or cancer) 胃部疾病或消化系统疾病（不包括肿瘤或癌）
			
 
				+    # (11)	 Emotional, nervous, or psychiatric problems 情感及精神方面问题 
			
 
				+    # (12)	 Memory-related disease 与记忆相关的疾病 （如老年痴呆症、脑萎缩、帕金森症）
			
 
				+    # (13)	 Arthritis or rheumatism 关节炎或风湿病
			
 
				+    # (14)  Asthma  哮喘
			
 
				+    health_status_select = health_status[['ID','householdID', 'communityID', 'da007_1_', 'da007_2_','da007_3_'
			
 
				+                                   ,'da007_4_','da007_5_','da007_6_','da007_7_','da007_8_','da007_9_','da007_10_','da007_11_'
			
 
				+                                   ,'da007_12_','da007_13_','da007_14_','da051_1_','da051_2_', 'da051_3_'
			
 
				+                                   ,'da052_1_','da052_2_','da052_3_','da053_1_','da053_2_','da053_3_','da054_1_','da054_2_','da054_3_'
			
 
				+                                   ,'da055_1_','da055_2_','da055_3_', 'da059','da061','da063'
			
 
				+                                   ,'da069']]
			
 
				+    
			
 
				+    data_2015 = pd.merge(data_2015, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
			
 
				+
			
 
				+    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
			
 
				+    health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc022_score"] = health_status["dc022"].apply(lambda x : 1 if x==72 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc023_score"] = health_status["dc023"].apply(lambda x : 1 if x==65 else 0 if pd.isna(x) else 0)
			
 
				+
			
 
				+    #词语记忆
			
 
				+    health_status["dc006s1_score"] = health_status["dc006s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc006s2_score"] = health_status["dc006s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc006s3_score"] = health_status["dc006s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc006s4_score"] = health_status["dc006s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s5_score"] = health_status["dc006s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s6_score"] = health_status["dc006s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc006s7_score"] = health_status["dc006s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s8_score"] = health_status["dc006s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s9_score"] = health_status["dc006s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc006s10_score"] = health_status["dc006s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                           
			
 
				+    health_status["dc006s11_score"] = health_status["dc006s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s1_score"] = health_status["dc027s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s2_score"] = health_status["dc027s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s3_score"] = health_status["dc027s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s4_score"] = health_status["dc027s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s5_score"] = health_status["dc027s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s6_score"] = health_status["dc027s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc027s7_score"] = health_status["dc027s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s8_score"] = health_status["dc027s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s9_score"] = health_status["dc027s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc027s10_score"] = health_status["dc027s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc027s11_score"] = health_status["dc027s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
			
 
				+    #画图
			
 
				+    health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0)
			
 
				+
			
 
				+    data_2015["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
			
 
				+        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
			
 
				+        health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
			
 
				+        health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
			
 
				+        health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
			
 
				+        health_status["dc006s5_score"] + health_status["dc006s6_score"] + health_status["dc006s7_score"] + \
			
 
				+        health_status["dc006s8_score"] + health_status["dc006s9_score"] + health_status["dc006s10_score"] + \
			
 
				+        health_status["dc006s11_score"] + health_status["dc027s1_score"]+ health_status["dc027s2_score"]+ \
			
 
				+        health_status["dc027s3_score"]+ health_status["dc027s4_score"]+ health_status["dc027s5_score"]+ \
			
 
				+        health_status["dc027s6_score"]+ health_status["dc027s7_score"]+ health_status["dc027s8_score"]+ \
			
 
				+        health_status["dc027s9_score"]+health_status["dc027s10_score"]+health_status["dc027s11_score"]+\
			
 
				+        health_status["draw_score"]
			
 
				+    #心理得分
			
 
				+    health_status["dc009_score"] = health_status["dc009"]-1
			
 
				+    health_status["dc010_score"] = health_status["dc010"]-1
			
 
				+    health_status["dc011_score"] = health_status["dc011"]-1
			
 
				+    health_status["dc012_score"] = health_status["dc012"]-1   
			
 
				+    health_status["dc013_score"] = 4 - health_status["dc013"] 
			
 
				+    health_status["dc014_score"] = health_status["dc014"]-1   
			
 
				+    health_status["dc015_score"] = health_status["dc015"]-1   
			
 
				+    health_status["dc016_score"] = 4 - health_status["dc016"]
			
 
				+    health_status["dc017_score"] = health_status["dc017"]-1   
			
 
				+    health_status["dc018_score"] = health_status["dc018"]-1 
			
 
				+    data_2015["psychiatric_score"] = health_status["dc009_score"] + health_status["dc010_score"] + health_status["dc011_score"] + \
			
 
				+        health_status["dc012_score"] + health_status["dc013_score"] + health_status["dc014_score"] + health_status["dc015_score"] + \
			
 
				+        health_status["dc016_score"] + health_status["dc017_score"] + health_status["dc018_score"]
			
 
				+    data_2015["wave"] = year
			
 
				+    change_columns(data_2015)
			
 
				+    data_2015 = pd.concat([data_2013, data_2015], axis=0)
			
 
				+
			
 
				+    # 2018年
			
 
				+    year = "2018"
			
 
				+    demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Demographic_Background.dta")
			
 
				+    psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2013/PSU.dta", encoding='gbk')
			
 
				+    health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Status_and_Functioning.dta")
			
 
				+    health_care, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Care_and_Insurance.dta")
			
 
				+    cognition, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Cognition.dta")
			
 
				+
			
 
				+    #性别#年龄#婚姻状况
			
 
				+    # 1 Married with spouse present
			
 
				+    # 2 Married but not living with spouse temporarily for reasons such as work
			
 
				+    # 3 Separated
			
 
				+    # 4 Divorced
			
 
				+    # 5 Widowed
			
 
				+    # 6 Never married
			
 
				+    data_2018 = demo[['ID','householdID', 'communityID','ba000_w2_3', 'ba004_w3_1', 'be001']]
			
 
				+    #居住地
			
 
				+    data_2018 = pd.merge(data_2018, psu[['communityID', 'province', 'city']], on = "communityID", how="left")
			
 
				+
			
 
				+    #身高#体重#收缩压#舒张压
			
 
				+    data_2018[['qi002', 'ql002', 'qa011','qa012']]=np.nan
			
 
				+
			
 
				+    #白细胞（WBC），平均红血球容积MCV,血小板,血尿素氮bun,葡萄糖glu,血肌酐crea,总胆固醇cho,甘油三酯tg,高密度脂蛋白HDL,低密度脂蛋白胆固醇LDL,C反应蛋白CRP
			
 
				+    #糖化血红蛋白hba1c,尿酸ua,血细胞比容Hematocrit,血红蛋白hgb,胱抑素C
			
 
				+    data_2018[['bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp','bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc']]=np.nan
			
 
				+    
			
 
				+    # 慢性病：
			
 
				+    # (1)  Hypertension 高血压病    
			
 
				+    # (2)	Dyslipidemia (elevation of low density lipoprotein, triglycerides (TGs),and total cholesterol, or a low high density lipoprotein level)血脂异常（包括低密度脂蛋白、甘油三酯、总胆固醇的升高或（和）高密度脂蛋白的下降）
			
 
				+    # (3)	Diabetes or high blood sugar糖尿病或血糖升高（包括糖耐量异常和空腹血糖升高）
			
 
				+    # (4)	Cancer or malignant tumor (excluding minor skin cancers) 癌症等恶性肿瘤（不包括轻度皮肤癌）
			
 
				+    # (5)	Chronic lung diseases, such as chronic bronchitis , emphysema ( excluding tumors, or cancer) 慢性肺部疾患如慢性支气管炎或肺气肿、肺心病（不包括肿瘤或癌）
			
 
				+    #        (6)  Liver disease (except fatty liver, tumors, and cancer) 肝脏疾病
			
 
				+    # （除脂肪肝、肿瘤或癌外）
			
 
				+    # (7)	Heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems 心脏病（如心肌梗塞、冠心病、心绞痛、充血性心力衰竭和其他心脏疾病）
			
 
				+    # (8)	 Stroke  中风
			
 
				+    # (9)	 Kidney disease (except for tumor or cancer) 肾脏疾病（不包括肿瘤或癌）
			
 
				+    # (10)	 Stomach or other digestive disease (except for tumor or cancer) 胃部疾病或消化系统疾病（不包括肿瘤或癌）
			
 
				+    # (11)	 Emotional, nervous, or psychiatric problems 情感及精神方面问题 
			
 
				+    # (12)	 Memory-related disease 与记忆相关的疾病 （如老年痴呆症、脑萎缩、帕金森症）
			
 
				+    # (13)	 Arthritis or rheumatism 关节炎或风湿病
			
 
				+    # (14)  Asthma  哮喘
			
 
				+    health_status_select = health_status[['ID','householdID', 'communityID', 'da007_1_', 'da007_2_','da007_3_'
			
 
				+                                   ,'da007_4_','da007_5_','da007_6_','da007_7_','da007_8_','da007_9_','da007_10_','da007_11_'
			
 
				+                                   ,'da007_12_','da007_13_','da007_14_','da051_1_','da051_2_', 'da051_3_'
			
 
				+                                   ,'da052_1_','da052_2_','da052_3_','da053_1_','da053_2_','da053_3_','da054_1_','da054_2_','da054_3_'
			
 
				+                                   ,'da055_1_','da055_2_','da055_3_', 'da059','da061','da063'
			
 
				+                                   ,'da069']]
			
 
				+
			
 
				+    data_2018 = pd.merge(data_2018, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
			
 
				+
			
 
				+    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
			
 
				+    cognition["dc001s1_score"] = cognition["dc001_w4"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    cognition["dc001s2_score"] = cognition["dc006_w4"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
			
 
				+    cognition["dc001s3_score"] = cognition["dc003_w4"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
			
 
				+    cognition["dc002_score"] = cognition["dc005_w4"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    cognition["dc003_score"] = cognition["dc002_w4"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    cognition["dc019_score"] = cognition["dc014_w4_1_1"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc020_score"] = cognition["dc014_w4_2_1"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc021_score"] = cognition["dc014_w4_3_1"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
			
 
				+    cognition["dc022_score"] = cognition["dc014_w4_4_1"].apply(lambda x : 1 if x==72 else 0 if pd.isna(x) else 0)
			
 
				+    cognition["dc023_score"] = cognition["dc014_w4_5_1"].apply(lambda x : 1 if x==65 else 0 if pd.isna(x) else 0)
			
 
				+
			
 
				+    #词语记忆
			
 
				+    cognition["dc006s1_score"] = cognition["dc028_w4_s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    cognition["dc006s2_score"] = cognition["dc028_w4_s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
			
 
				+    cognition["dc006s3_score"] = cognition["dc028_w4_s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
			
 
				+    cognition["dc006s4_score"] = cognition["dc028_w4_s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc006s5_score"] = cognition["dc028_w4_s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc006s6_score"] = cognition["dc028_w4_s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
			
 
				+    cognition["dc006s7_score"] = cognition["dc028_w4_s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc006s8_score"] = cognition["dc028_w4_s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc006s9_score"] = cognition["dc028_w4_s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
			
 
				+    cognition["dc006s10_score"] = cognition["dc028_w4_s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                           
			
 
				+    cognition["dc006s11_score"] = cognition["dc028_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc027s1_score"] = cognition["dc047_w4_s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc027s2_score"] = cognition["dc047_w4_s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc027s3_score"] = cognition["dc047_w4_s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc027s4_score"] = cognition["dc047_w4_s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc027s5_score"] = cognition["dc047_w4_s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc027s6_score"] = cognition["dc047_w4_s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
			
 
				+    cognition["dc027s7_score"] = cognition["dc047_w4_s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc027s8_score"] = cognition["dc047_w4_s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
			
 
				+    cognition["dc027s9_score"] = cognition["dc047_w4_s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
			
 
				+    cognition["dc027s10_score"] = cognition["dc047_w4_s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                            
			
 
				+    cognition["dc027s11_score"] = cognition["dc047_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
			
 
				+    #画图
			
 
				+    cognition["draw_score"] = cognition["dc024_w4"].apply(lambda x : 1 if x==1 else 0)
			
 
				+
			
 
				+    data_2018["Cognition_score"] = cognition["dc001s1_score"] + cognition["dc001s2_score"] + \
			
 
				+        cognition["dc001s3_score"] + cognition["dc002_score"]+ cognition["dc003_score"]+ \
			
 
				+        cognition["dc019_score"]+ cognition["dc020_score"] + cognition["dc021_score"]+ \
			
 
				+        cognition["dc022_score"]+ cognition["dc023_score"] + cognition["dc006s1_score"] + \
			
 
				+        cognition["dc006s2_score"] + cognition["dc006s3_score"] + cognition["dc006s4_score"] + \
			
 
				+        cognition["dc006s5_score"] + cognition["dc006s6_score"] + cognition["dc006s7_score"] + \
			
 
				+        cognition["dc006s8_score"] + cognition["dc006s9_score"] + cognition["dc006s10_score"] + \
			
 
				+        cognition["dc006s11_score"] + cognition["dc027s1_score"]+ cognition["dc027s2_score"]+ \
			
 
				+        cognition["dc027s3_score"]+ cognition["dc027s4_score"]+ cognition["dc027s5_score"]+ \
			
 
				+        cognition["dc027s6_score"]+ cognition["dc027s7_score"]+ cognition["dc027s8_score"]+ \
			
 
				+        cognition["dc027s9_score"]+cognition["dc027s10_score"]+cognition["dc027s11_score"]+\
			
 
				+        cognition["draw_score"]
			
 
				+    #心理得分
			
 
				+    cognition["dc009_score"] = cognition["dc009"]-1
			
 
				+    cognition["dc010_score"] = cognition["dc010"]-1
			
 
				+    cognition["dc011_score"] = cognition["dc011"]-1
			
 
				+    cognition["dc012_score"] = cognition["dc012"]-1   
			
 
				+    cognition["dc013_score"] = 4 - cognition["dc013"] 
			
 
				+    cognition["dc014_score"] = cognition["dc014"]-1   
			
 
				+    cognition["dc015_score"] = cognition["dc015"]-1   
			
 
				+    cognition["dc016_score"] = 4 - cognition["dc016"]
			
 
				+    cognition["dc017_score"] = cognition["dc017"]-1   
			
 
				+    cognition["dc018_score"] = cognition["dc018"]-1 
			
 
				+    data_2018["psychiatric_score"] = cognition["dc009_score"] + cognition["dc010_score"] + cognition["dc011_score"] + \
			
 
				+        cognition["dc012_score"] + cognition["dc013_score"] + cognition["dc014_score"] + cognition["dc015_score"] + \
			
 
				+        cognition["dc016_score"] + cognition["dc017_score"] + cognition["dc018_score"]
			
 
				+    data_2018["wave"] = year
			
 
				+    change_columns(data_2018)
			
 
				+    data_2018 = pd.concat([data_2015, data_2018], axis=0)
			
 
				+
			
 
				+    # 2020年
			
 
				+    year = "2020"
			
 
				+    demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Demographic_Background.dta")
			
 
				+    psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2013/PSU.dta", encoding='gbk')
			
 
				+    health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Status_and_Functioning.dta")
			
 
				+
			
 
				+    #性别#年龄#婚姻状况
			
 
				+    # 1 Married with spouse present
			
 
				+    # 2 Married but not living with spouse temporarily for reasons such as work
			
 
				+    # 3 Separated
			
 
				+    # 4 Divorced
			
 
				+    # 5 Widowed
			
 
				+    # 6 Never married
			
 
				+    data_2020 = demo[['ID','householdID', 'communityID','ba001', 'ba003_1','ba011']]
			
 
				+    #居住地
			
 
				+    data_2020 = pd.merge(data_2020, psu[['communityID', 'province', 'city']], on = "communityID", how="left")
			
 
				+
			
 
				+    #身高#体重#收缩压#舒张压
			
 
				+    data_2020[['qi002', 'ql002', 'qa011','qa012', 'qa013']]=np.nan
			
 
				+
			
 
				+    #白细胞（WBC），平均红血球容积MCV,血小板,血尿素氮bun,葡萄糖glu,血肌酐crea,总胆固醇cho,甘油三酯tg,高密度脂蛋白HDL,低密度脂蛋白胆固醇LDL,C反应蛋白CRP
			
 
				+    #糖化血红蛋白hba1c,尿酸ua,血细胞比容Hematocrit,血红蛋白hgb,胱抑素C
			
 
				+    data_2020[['bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp','bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc']]=np.nan
			
 
				+    
			
 
				+    # 慢性病：
			
 
				+    # (1)  Hypertension 高血压病    
			
 
				+    # (2)	Dyslipidemia (elevation of low density lipoprotein, triglycerides (TGs),and total cholesterol, or a low high density lipoprotein level)血脂异常（包括低密度脂蛋白、甘油三酯、总胆固醇的升高或（和）高密度脂蛋白的下降）
			
 
				+    # (3)	Diabetes or high blood sugar糖尿病或血糖升高（包括糖耐量异常和空腹血糖升高）
			
 
				+    # (4)	Cancer or malignant tumor (excluding minor skin cancers) 癌症等恶性肿瘤（不包括轻度皮肤癌）
			
 
				+    # (5)	Chronic lung diseases, such as chronic bronchitis , emphysema ( excluding tumors, or cancer) 慢性肺部疾患如慢性支气管炎或肺气肿、肺心病（不包括肿瘤或癌）
			
 
				+    #        (6)  Liver disease (except fatty liver, tumors, and cancer) 肝脏疾病
			
 
				+    # （除脂肪肝、肿瘤或癌外）
			
 
				+    # (7)	Heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems 心脏病（如心肌梗塞、冠心病、心绞痛、充血性心力衰竭和其他心脏疾病）
			
 
				+    # (8)	 Stroke  中风
			
 
				+    # (9)	 Kidney disease (except for tumor or cancer) 肾脏疾病（不包括肿瘤或癌）
			
 
				+    # (10)	 Stomach or other digestive disease (except for tumor or cancer) 胃部疾病或消化系统疾病（不包括肿瘤或癌）
			
 
				+    # (11)	 Emotional, nervous, or psychiatric problems 情感及精神方面问题 
			
 
				+    # (12)	 Memory-related disease 与记忆相关的疾病 （如老年痴呆症、脑萎缩、帕金森症）
			
 
				+    # (13)	 Arthritis or rheumatism 关节炎或风湿病
			
 
				+    # (14)  Asthma  哮喘
			
 
				+    # 2020年把帕金森和记忆病症分开，需要和以前对齐
			
 
				+    health_status['da003_12_'] = health_status.apply(process_row, axis=1)
			
 
				+    health_status_select = health_status[['ID','householdID', 'communityID', 'da003_1_', 'da003_2_','da003_3_'
			
 
				+                                   ,'da003_4_','da003_5_','da003_6_','da003_7_','da003_8_','da003_9_','da003_10_','da003_11_'
			
 
				+                                   ,'da003_12_','da003_14_','da003_15_','da032_1_','da032_2_', 'da032_3_'
			
 
				+                                   ,'da033_1_','da033_2_','da033_3_','da034_1_','da034_2_','da034_3_','da035_1_','da035_2_','da035_3_'
			
 
				+                                    ,'da036_1_','da036_2_','da036_3_', 'da046','da047','da050_1'
			
 
				+                                   ,'da051']]
			
 
				+    health_status_select['da051'] = health_status_select['da051'].apply(update_da051)
			
 
				+    
			
 
				+    data_2020 = pd.merge(data_2020, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
			
 
				+
			
 
				+    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
			
 
				+    health_status["dc001s1_score"] = health_status["dc001"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc001s2_score"] = health_status["dc005"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc001s3_score"] = health_status["dc003"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc002_score"] = health_status["dc004"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc003_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc019_score"] = health_status["dc007_1"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc020_score"] = health_status["dc007_2"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc021_score"] = health_status["dc007_3"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc022_score"] = health_status["dc007_4"].apply(lambda x : 1 if x==72 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc023_score"] = health_status["dc007_5"].apply(lambda x : 1 if x==65 else 0 if pd.isna(x) else 0)
			
 
				+
			
 
				+    #词语记忆
			
 
				+    health_status["dc006s1_score"] = health_status["dc012_s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc006s2_score"] = health_status["dc012_s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc006s3_score"] = health_status["dc012_s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
			
 
				+    health_status["dc006s4_score"] = health_status["dc012_s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s5_score"] = health_status["dc012_s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s6_score"] = health_status["dc012_s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc006s7_score"] = health_status["dc012_s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s8_score"] = health_status["dc012_s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc006s9_score"] = health_status["dc012_s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc006s10_score"] = health_status["dc012_s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                           
			
 
				+    health_status["dc006s11_score"] = health_status["dc012_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s1_score"] = health_status["dc028_s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s2_score"] = health_status["dc028_s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s3_score"] = health_status["dc028_s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s4_score"] = health_status["dc028_s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s5_score"] = health_status["dc028_s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s6_score"] = health_status["dc028_s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc027s7_score"] = health_status["dc028_s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s8_score"] = health_status["dc028_s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
			
 
				+    health_status["dc027s9_score"] = health_status["dc028_s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc027s10_score"] = health_status["dc028_s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                            
			
 
				+    health_status["dc027s11_score"] = health_status["dc028_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
			
 
				+    #画图
			
 
				+    health_status["draw_score"] = health_status["dc009"].apply(lambda x : 1 if x==1 else 0)
			
 
				+
			
 
				+    data_2011["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
			
 
				+        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
			
 
				+        health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
			
 
				+        health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
			
 
				+        health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
			
 
				+        health_status["dc006s5_score"] + health_status["dc006s6_score"] + health_status["dc006s7_score"] + \
			
 
				+        health_status["dc006s8_score"] + health_status["dc006s9_score"] + health_status["dc006s10_score"] + \
			
 
				+        health_status["dc006s11_score"] + health_status["dc027s1_score"]+ health_status["dc027s2_score"]+ \
			
 
				+        health_status["dc027s3_score"]+ health_status["dc027s4_score"]+ health_status["dc027s5_score"]+ \
			
 
				+        health_status["dc027s6_score"]+ health_status["dc027s7_score"]+ health_status["dc027s8_score"]+ \
			
 
				+        health_status["dc027s9_score"]+health_status["dc027s10_score"]+health_status["dc027s11_score"]+\
			
 
				+        health_status["draw_score"]
			
 
				+    #心理得分
			
 
				+    health_status["dc009_score"] = health_status["dc016"]-1
			
 
				+    health_status["dc010_score"] = health_status["dc017"]-1
			
 
				+    health_status["dc011_score"] = health_status["dc018"]-1
			
 
				+    health_status["dc012_score"] = health_status["dc019"]-1   
			
 
				+    health_status["dc013_score"] = 4 - health_status["dc020"] 
			
 
				+    health_status["dc014_score"] = health_status["dc021"]-1   
			
 
				+    health_status["dc015_score"] = health_status["dc022"]-1   
			
 
				+    health_status["dc016_score"] = 4 - health_status["dc023"]
			
 
				+    health_status["dc017_score"] = health_status["dc024"]-1   
			
 
				+    health_status["dc018_score"] = health_status["dc025"]-1 
			
 
				+    data_2020["psychiatric_score"] = health_status["dc009_score"] + health_status["dc010_score"] + health_status["dc011_score"] + \
			
 
				+        health_status["dc012_score"] + health_status["dc013_score"] + health_status["dc014_score"] + health_status["dc015_score"] + \
			
 
				+        health_status["dc016_score"] + health_status["dc017_score"] + health_status["dc018_score"]
			
 
				+    data_2020["wave"] = year
			
 
				+    change_columns(data_2020)
			
 
				+    data_2020 = pd.concat([data_2018, data_2020], axis=0)
			
 
				+
			
 
				+    #修改地区名称
			
 
				+    #省份、城市名称和污染物数据格式对齐
			
 
				+    #海东地区->海东市
			
 
				+    data_2020['city'] = data_2020['city'].replace('海东地区', '海东市')
			
 
				+    #北京 -> 北京市
			
 
				+    data_2020['city'] = data_2020['city'].replace('北京', '北京市')
			
 
				+    data_2020['province'] = data_2020['province'].replace('北京', '北京市')
			
 
				+    #哈尔滨 -> 哈尔滨市
			
 
				+    data_2020['city'] = data_2020['city'].replace('哈尔滨', '哈尔滨市')
			
 
				+    #天津 -> 天津市
			
 
				+    data_2020['city'] = data_2020['city'].replace('天津', '天津市')
			
 
				+    data_2020['province'] = data_2020['province'].replace('天津', '天津市')
			
 
				+    #广西省 -> 广西壮族自治区
			
 
				+    data_2020['province'] = data_2020['province'].replace('广西省', '广西壮族自治区')
			
 
				+    #巢湖市 -> 合肥市
			
 
				+    data_2020['city'] = data_2020['city'].replace('巢湖市', '合肥市')
			
 
				+    #襄樊市->襄阳市
			
 
				+    data_2020['city'] = data_2020['city'].replace('襄樊市', '襄阳市') 
			
 
				+    data_2020.to_csv("/root/r_base/CHARLS/result_all_new.csv", index=False)
			
 
				+    print(123)
			
--- a/CLHLS_P/CLHLS_process.py
+++ b/CLHLS_P/CLHLS_process.py
@@ -52,7 +52,7 @@ def get_mmse(columns_cognitive_98,columns_reaction_98,columns_attention_98, colu
 
				     # 计算总合
			
 
				     result['mmse_'+cognitive_name] = result["general_cognitive_"+cognitive_name] + result["reaction_"+cognitive_name]+ result["attention_calculation_"+cognitive_name]+ result["memory_"+cognitive_name]+ result["language_selfcoordination_"+cognitive_name]
			
 
				 
			
 
				-if __name__ == "__main__":
			
 
				+def deal_1998_2018_data():
			
 
				     sav_file_path = "CLHLS/clhls_1998_2018_longitudinal_dataset_released_version1.sav"
			
 
				     csv_file_path = "CLHLS/clhls_1998_2018_longitudinal_dataset_released_version1.csv"
			
 
				     # 将sav数据转为csv
			
@@ -200,8 +200,8 @@ if __name__ == "__main__":
 
				                     "c11_14", "c12_14", "c13_14", "c14_14", "c15_14", "c21a_14", "c21b_14", "c21c_14", "c31a_14", "c31b_14", "c31c_14", "c31d_14", "c31e_14", "c32_14", "c41a_14",  "c41b_14", "c41c_14", "c51a_14", "c51b_14", "c52_14", "c53a_14", "c53b_14", "c53c_14",
			
 
				                     "c11_18", "c12_18", "c13_18", "c14_18", "c15_18", "c21a_18", "c21b_18", "c21c_18", "c31a_18", "c31b_18", "c31c_18", "c31d_18", "c31e_18", "c32_18", "c41a_18",  "c41b_18", "c41c_18", "c51a_18", "c51b_18", "c52_18", "c53a_18", "c53b_18", "c53c_18"]
			
 
				     trans_mmse(columns_mmse, data)
			
 
				-    columns_mmse_c16 = ["c16", "c16_0", "c16_2", "c16_5", "c16_8", "c16_11", "c16_14", "c16_8"]
			
 
				-    trans_mmse_c16(columns_mmse, data)
			
 
				+    columns_mmse_c16 = ["c16", "c16_0", "c16_2", "c16_5", "c16_8", "c16_11", "c16_14", "c16_18"]
			
 
				+    trans_mmse_c16(columns_mmse_c16, data)
			
 
				     columns_cognitive_98 = ["c11", "c12", "c13", "c14", "c15", "c16"]
			
 
				     columns_reaction_98 = ["c21a", "c21b", "c21c"]
			
 
				     columns_attention_98 = ["c31a", "c31b", "c31c", "c31d", "c31e", "c32"]
			
@@ -250,8 +250,189 @@ if __name__ == "__main__":
 
				     columns_memory_18 = ["c41a_18",  "c41b_18", "c41c_18"]
			
 
				     columns_language_18 = ["c51a_18", "c51b_18", "c52_18", "c53a_18", "c53b_18", "c53c_18"]
			
 
				     get_mmse(columns_cognitive_18,columns_reaction_18,columns_attention_18, columns_memory_18 ,columns_language_18, data, result, "18")
			
 
				+
			
 
				+    # #血液指标
			
 
				+    # columns_to_exclude = ['midn', 'trueage', 'a1']  # 替换为你要排除的列名
			
 
				+    # #2008
			
 
				+    # biomarker_08 = pd.read_csv("CLHLS/biomarker_dataset_CLHLS_2008-1.tab", sep='\t')
			
 
				+    # biomarker_08 = biomarker_08.drop(columns=columns_to_exclude)
			
 
				+    # columns_bio = ["id"]
			
 
				+    # for col in biomarker_08.columns:
			
 
				+    #     if not col == "id":
			
 
				+    #         columns_bio.append(col+"_08")
			
 
				+    # biomarker_08.columns = columns_bio
			
 
				+    # result = pd.merge(result, biomarker_08, on = ["id"], how="left")
			
 
				     print(result.head())
			
 
				     result.to_csv("CLHLS/clhls_1998_2018_result.csv", index=False)
			
 
				+
			
 
				+def deal_2008_2018_data():
			
 
				+    sav_file_path = "CLHLS/clhls_2008_2018_longitudinal_dataset_released_version1.sav"
			
 
				+    csv_file_path = "CLHLS/clhls_2008_2018_longitudinal_dataset_released_version1.csv"
			
 
				+    # 将sav数据转为csv
			
 
				+    # sav2csv(sav_file_path, csv_file_path)
			
 
				+    #处理数据
			
 
				+    data = pd.read_csv(csv_file_path)
			
 
				+    # 存活状态0存活；1死亡；-9失访；-8死亡/失访
			
 
				+    result = data[['id', 'dth08_11', 'dth11_14', 'dth14_18']]
			
 
				+    # 人口特征学变量
			
 
				+    # 8/9代表无法回答和缺失
			
 
				+    # 年龄
			
 
				+    result[['trueage_08','trueage_11', 'trueage_14', 'trueage_18']] = data[['trueage','vage_11', 'trueage_14', 'trueage_18']]
			
 
				+    # 性别 1男；0女
			
 
				+    result['sex'] = data['a1'].apply(lambda x : 1 if x==1 else 0)
			
 
				+    # 民族 1汉族；0非汉族
			
 
				+    result['ethnic'] = data['a2'].apply(lambda x : 1 if x==1 else 0)
			
 
				+    # 出生地 1城市；0农村
			
 
				+    result['birth_place'] = data['a43'].apply(lambda x : 1 if x == 1 else (0 if x == 2 else np.nan))
			
 
				+    # 教育状况 无11年
			
 
				+    result['edu_08'] = data['f1'].apply(lambda x : np.nan if x==88 or x==99 else x)
			
 
				+    result['edu_14'] = data['f1_14'].apply(lambda x : np.nan if x==88 or x==99 else x)
			
 
				+    result['edu_18'] = data['f1_18'].apply(lambda x : np.nan if x==88 or x==99 else x)
			
 
				+    # 婚姻状况 0separated/divorced/widowed/never married; 1currently married and living with spouse
			
 
				+    result['marital_08'] = data['f41'].apply(lambda x : 0 if x==2 or x==3 or x==4 or x==5 else (np.nan if x==9 else 1))
			
 
				+    result['marital_11'] = data['f41_11'].apply(lambda x : 0 if x==2 or x==3 or x==4 or x==5 else (np.nan if x==9 or x==-9 or x==-8 or x==-7 else 1))
			
 
				+    result['marital_14'] = data['f41_14'].apply(lambda x : 0 if x==2 or x==3 or x==4 or x==5 else (np.nan if x==9 else 1))
			
 
				+    result['marital_18'] = data['f41_18'].apply(lambda x : 0 if x==2 or x==3 or x==4 or x==5 else (np.nan if x==9 else 1))
			
 
				+    # 生活是否富裕 1富裕及以上；0一般及以下
			
 
				+    result['econ_state_08'] = data['f34'].apply(lambda x : 0 if x==2 or x==3 else (1 if x==1 else np.nan))
			
 
				+    result['econ_state_11'] = data['f34_11'].apply(lambda x : 0 if x==4 or x==3 or x==5 else (1 if x==1 or x==2 else np.nan))
			
 
				+    result['econ_state_14'] = data['f34_14'].apply(lambda x : 0 if x==4 or x==3 or x==5 else (1 if x==1 or x==2 else np.nan))
			
 
				+    result['econ_state_18'] = data['f34_18'].apply(lambda x : 0 if x==4 or x==3 or x==5 else (1 if x==1 or x==2 else np.nan))
			
 
				+    # 上一年家庭收入 99998超过10万
			
 
				+    result['income_08'] = data['f35'].apply(lambda x : x if x== 99998 else np.nan)
			
 
				+    result['income_11'] = data['f35_11'].apply(lambda x : x if x== 99998 else np.nan)
			
 
				+    result['income_14'] = data['f35_14'].apply(lambda x : x if x== 99998 else np.nan)
			
 
				+    result['income_18'] = data['f35_18'].apply(lambda x : x if x== 99998 else np.nan)
			
 
				+    # 居住状态 1与家庭成员同住；2独居；3在机构居住
			
 
				+    result['co_residence_08'] = data['a51'].apply(lambda x : np.nan if x==9 else x)
			
 
				+    result['co_residence_11'] = data['a51_11'].apply(lambda x : np.nan if x==9 or x==-9 or x == -8 or x == -7 else x)
			
 
				+    result['co_residence_14'] = data['a51_14'].apply(lambda x : np.nan if x==9 else x)
			
 
				+    result['co_residence_18'] = data['a51_18'].apply(lambda x : np.nan if x==9 else x)
			
 
				+    # 目前是否吸烟 1是；2否
			
 
				+    result['smoke_08'] = data['d71'].apply(lambda x : np.nan if x==9 else x)
			
 
				+    result['smoke_11'] = data['d71_11'].apply(lambda x : np.nan if x==9 or x==-9 or x == -8 or x == -7 else x)
			
 
				+    result['smoke_14'] = data['d71_14'].apply(lambda x : np.nan if x==9 else x)
			
 
				+    result['smoke_18'] = data['d71_18'].apply(lambda x : np.nan if x==9 else x)
			
 
				+    # 目前是否饮酒 1是；2否
			
 
				+    result['drink_08'] = data['d81'].apply(lambda x : np.nan if x==9 else x)
			
 
				+    result['drink_11'] = data['d81_11'].apply(lambda x : np.nan if x==9 or x==-9 or x == -8 or x == -7 else x)
			
 
				+    result['drink_14'] = data['d81_14'].apply(lambda x : np.nan if x==9 else x)
			
 
				+    result['drink_18'] = data['d81_18'].apply(lambda x : np.nan if x==9 or x == 8 else x)
			
 
				+    # 目前是否锻炼
			
 
				+    result['exercise_08'] = data['d91'].apply(lambda x : np.nan if x==9 else x)
			
 
				+    result['exercise_11'] = data['d91_11'].apply(lambda x : np.nan if x==9 or x==-9 or x == -8 or x == -7 or x == 8 else x)
			
 
				+    result['exercise_14'] = data['d91_14'].apply(lambda x : np.nan if x==9 else x)
			
 
				+    result['exercise_18'] = data['d91_18'].apply(lambda x : np.nan if x==9 or x == 8 else x)
			
 
				+    # 健康状况变量 1very good; 2good; 3so so; 4bad; 5very bad; 
			
 
				+    result['self_reported_helth_08'] = data['b12'].apply(lambda x : np.nan if x==9 or x==8 else x)
			
 
				+    result['self_reported_helth_11'] = data['b12_11'].apply(lambda x : np.nan if x==9 or x==-9 or x == -8 or x == -7 or x == 8 else x)
			
 
				+    result['self_reported_helth_14'] = data['b12_14'].apply(lambda x : np.nan if x==9 or x == 8 else x)
			
 
				+    result['self_reported_helth_18'] = data['b12_18'].apply(lambda x : np.nan if x==9 or x == 8 else x)
			
 
				+    # 慢性病
			
 
				+    result['chronic_08'] = data['g14a1'].apply(lambda x : np.nan if x==66 or x==89 or x==99 or x==-9 or x == -6 or x == -1 else x)
			
 
				+    result['chronic_11'] = data['g14a1_11'].apply(lambda x : np.nan if x==66 or x==88 or x==99 or x==-9 or x == -8 or x == -7 or x==-1 else x)
			
 
				+    result['chronic_14'] = data['g14a1_14'].apply(lambda x : np.nan if x==66 or x==99 or x==88 or x == -1 else x)
			
 
				+    result['chronic_18'] = data['g14a1_18'].apply(lambda x : np.nan if x==66 or x==99 or x==88 or x == -1 else x)
			
 
				+    
			
 
				+    # 抑郁量表得分-only 18年  0无抑郁症；1有抑郁症
			
 
				+    # 简版流调中心抑郁量表（CESD-10）10个CESD项目，每个项目的分值范围为0到3分，将每个CESD项目的分值相加，得到总得分
			
 
				+    # 定义转换规则
			
 
				+    transformation_one = {1: 3, 2: 2, 3: 2, 4: 1, 5: 0}
			
 
				+    # 应用转换规则
			
 
				+    columns_cesd_one = ['b31_18', 'b32_18', 'b33_18', 'b34_18', 'b36_18', 'b38_18', 'b39_18']
			
 
				+    for column_one in columns_cesd_one:
			
 
				+        data[column_one] = data[column_one].map(transformation_one).fillna(np.nan)  # 将8转换为缺失值
			
 
				+    # 定义转换规则
			
 
				+    transformation_two = {1: 0, 2: 1, 3: 1, 4: 2, 5: 3}
			
 
				+    # 应用转换规则
			
 
				+    columns_cesd_two = ['b35_18', 'b37_18', 'b310a_18']
			
 
				+    for column_two in columns_cesd_two:
			
 
				+        data[column_two] = data[column_two].map(transformation_two).fillna(np.nan)  # 将8转换为缺失值
			
 
				+    result['cesd'] = data['b31_18'] + data['b32_18'] + data['b33_18'] + data['b34_18'] + data['b36_18'] + data['b38_18'] + data['b39_18'] + data['b35_18'] + data['b37_18'] + data['b310a_18'] 
			
 
				+    result['cesd_d'] = result['cesd'].apply(lambda x : 0 if x >= 0 and x <= 15 else (1 if x >=16 and x <= 30 else np.nan))
			
 
				+    
			
 
				+    # 日常生活活动能力 0无残疾；1有残疾
			
 
				+    # ADL6个项目bathing, dressing, eating, indoor transferring, toileting, and continence, 每个项目的分值范围是0到2分, 将每个ADL项目的得分相加，得到总得分
			
 
				+    columns_adl_08 = ['e1', 'e2', 'e3', 'e4', 'e5', 'e6']
			
 
				+    trans_adl(columns_adl_08, data, result, "adl_08")
			
 
				+    columns_adl_11 = ['e1_11', 'e2_11', 'e3_11', 'e4_11', 'e5_11', 'e6_11']
			
 
				+    trans_adl(columns_adl_11, data, result, "adl_11")
			
 
				+    columns_adl_14 = ['e1_14', 'e2_14', 'e3_14', 'e4_14', 'e5_14', 'e6_14']
			
 
				+    trans_adl(columns_adl_14, data, result, "adl_14")
			
 
				+    columns_adl_18 = ['e1_18', 'e2_18', 'e3_18', 'e4_18', 'e5_18', 'e6_18']
			
 
				+    trans_adl(columns_adl_18, data, result, "adl_18")
			
 
				+    # 认知功能 0有认知功能障碍；1认知功能正常
			
 
				+    # 简易精神状态评价量表（Mini-mental State Examination, MMSE)，该量表包括一般能力（12分），反应能力（3分），注意力与计算力（6分），回忆力（3分），语言理解
			
 
				+    # 与自我协调能力（6分）5个部分24个问题，总分30分，分数越高，表示认知功能水平越高
			
 
				+    columns_mmse = ["c11", "c12", "c13", "c14", "c15", "c21a", "c21b", "c21c", "c31a", "c31b", "c31c", "c31d", "c31e", "c32", "c41a",  "c41b", "c41c", "c51a", "c51b", "c52", "c53a", "c53b", "c53c",
			
 
				+                    "c11_11", "c12_11", "c13_11", "c14_11", "c15_11", "c21a_11", "c21b_11", "c21c_11", "c31a_11", "c31b_11", "c31c_11", "c31d_11", "c31e_11", "c32_11", "c41a_11",  "c41b_11", "c41c_11", "c51a_11", "c51b_11", "c52_11", "c53a_11", "c53b_11", "c53c_11",
			
 
				+                    "c11_14", "c12_14", "c13_14", "c14_14", "c15_14", "c21a_14", "c21b_14", "c21c_14", "c31a_14", "c31b_14", "c31c_14", "c31d_14", "c31e_14", "c32_14", "c41a_14",  "c41b_14", "c41c_14", "c51a_14", "c51b_14", "c52_14", "c53a_14", "c53b_14", "c53c_14",
			
 
				+                    "c11_18", "c12_18", "c13_18", "c14_18", "c15_18", "c21a_18", "c21b_18", "c21c_18", "c31a_18", "c31b_18", "c31c_18", "c31d_18", "c31e_18", "c32_18", "c41a_18",  "c41b_18", "c41c_18", "c51a_18", "c51b_18", "c52_18", "c53a_18", "c53b_18", "c53c_18"]
			
 
				+    trans_mmse(columns_mmse, data)
			
 
				+    columns_mmse_c16 = ["c16", "c16_11", "c16_14", "c16_18"]
			
 
				+    trans_mmse_c16(columns_mmse_c16, data)
			
 
				+    columns_cognitive_08 = ["c11", "c12", "c13", "c14", "c15", "c16"]
			
 
				+    columns_reaction_08 = ["c21a", "c21b", "c21c"]
			
 
				+    columns_attention_08 = ["c31a", "c31b", "c31c", "c31d", "c31e", "c32"]
			
 
				+    columns_memory_08 = ["c41a",  "c41b", "c41c"]
			
 
				+    columns_language_08 = ["c51a", "c51b", "c52", "c53a", "c53b", "c53c"]
			
 
				+    get_mmse(columns_cognitive_08,columns_reaction_08,columns_attention_08, columns_memory_08 ,columns_language_08, data, result, "08")
			
 
				+    columns_cognitive_11 = ["c11_11", "c12_11", "c13_11", "c14_11", "c15_11", "c16_11"]
			
 
				+    columns_reaction_11 = ["c21a_11", "c21b_11", "c21c_11"]
			
 
				+    columns_attention_11 = ["c31a_11", "c31b_11", "c31c_11", "c31d_11", "c31e_11", "c32_11"]
			
 
				+    columns_memory_11 = ["c41a_11",  "c41b_11", "c41c_11"]
			
 
				+    columns_language_11 = ["c51a_11", "c51b_11", "c52_11", "c53a_11", "c53b_11", "c53c_11"]
			
 
				+    get_mmse(columns_cognitive_11,columns_reaction_11,columns_attention_11, columns_memory_11 ,columns_language_11, data, result, "11")
			
 
				+    columns_cognitive_14 = ["c11_14", "c12_14", "c13_14", "c14_14", "c15_14", "c16_14"]
			
 
				+    columns_reaction_14 = ["c21a_14", "c21b_14", "c21c_14"]
			
 
				+    columns_attention_14 = ["c31a_14", "c31b_14", "c31c_14", "c31d_14", "c31e_14", "c32_14"]
			
 
				+    columns_memory_14 = ["c41a_14",  "c41b_14", "c41c_14"]
			
 
				+    columns_language_14 = ["c51a_14", "c51b_14", "c52_14", "c53a_14", "c53b_14", "c53c_14"]
			
 
				+    get_mmse(columns_cognitive_14,columns_reaction_14,columns_attention_14, columns_memory_14 ,columns_language_14, data, result, "14")
			
 
				+    columns_cognitive_18 = ["c11_18", "c12_18", "c13_18", "c14_18", "c15_18", "c16_18"]
			
 
				+    columns_reaction_18 = ["c21a_18", "c21b_18", "c21c_18"]
			
 
				+    columns_attention_18 = ["c31a_18", "c31b_18", "c31c_18", "c31d_18", "c31e_18", "c32_18"]
			
 
				+    columns_memory_18 = ["c41a_18",  "c41b_18", "c41c_18"]
			
 
				+    columns_language_18 = ["c51a_18", "c51b_18", "c52_18", "c53a_18", "c53b_18", "c53c_18"]
			
 
				+    get_mmse(columns_cognitive_18,columns_reaction_18,columns_attention_18, columns_memory_18 ,columns_language_18, data, result, "18")
			
 
				+
			
 
				+    # #血液指标
			
 
				+    #2008
			
 
				+    columns_bio = ["id"]
			
 
				+    biomarker = pd.read_csv("CLHLS/biomarker_dataset_CLHLS_2008.tab", sep='\t')
			
 
				+    biomarker = biomarker.loc[:,["id", "plt", "lymph", "hdl"]]
			
 
				+    for col in biomarker.columns:
			
 
				+        if not col == "id":
			
 
				+            columns_bio.append(col+"_08")
			
 
				+    biomarker.columns = columns_bio
			
 
				+    result = pd.merge(result, biomarker, on = ["id"], how="left")
			
 
				+
			
 
				+    #2012
			
 
				+    columns_bio = ["id"]
			
 
				+    biomarker = pd.read_csv("CLHLS/biomarker_dataset_CLHLS_2012.tab", sep='\t')
			
 
				+    biomarker = biomarker.loc[:,["id", "plt", "lymph", "hdlc"]]
			
 
				+    for col in biomarker.columns:
			
 
				+        if not col == "id":
			
 
				+            columns_bio.append(col+"_12")
			
 
				+    biomarker.columns = columns_bio
			
 
				+    result = pd.merge(result, biomarker, on = ["id"], how="left")
			
 
				+
			
 
				+    #2014
			
 
				+    columns_bio = ["id"]
			
 
				+    biomarker = pd.read_csv("CLHLS/biomarker_dataset_CLHLS_2014.tab", sep='\t')
			
 
				+    biomarker = biomarker.loc[:,["id", "plt", "lymph", "hdlc"]]
			
 
				+    for col in biomarker.columns:
			
 
				+        if not col == "id":
			
 
				+            columns_bio.append(col+"_14")
			
 
				+    biomarker.columns = columns_bio
			
 
				+    result = pd.merge(result, biomarker, on = ["id"], how="left")
			
 
				+
			
 
				+    print(result.head())
			
 
				+    result.to_csv("CLHLS/clhls_2008_2018_result.csv", index=False)
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # deal_1998_2018_data()
			
 
				+    deal_2008_2018_data()
			
 
				     print(123)