import pandas as pd
import numpy as np
import pyreadstat

#统一列名
def change_columns(df):
    df.columns = ["ID",'householdID','communityID','sex', "birth_year", "marital_status" , 'province', 'city',"Height", "Weight",
                  "Systolic","Diastolic",

                  'bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp', 
                  'bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc',

                  'Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases', 
                  'Liver_Disease', 'Heart_Problems', 'Stroke', ' Kidney_Diease','Stomach_or_Other_Digestive_Disease', 
                  'Emotional_Nervous_or_Psychiatric_Problems', ' Memory_Related_Disease',' Arthritis_or_Rheumatism','Asthma',
                  
                  'Vigorous_Activities', 'Moderate_Physical_Effort','Walking','Vigorous_Activities_day', 'Moderate_Physical_Effort_day',
                  'Walking_day','Vigorous_Activities_2h', 'Moderate_Physical_Effort_2h','Walking_2h','Vigorous_Activities_30m', 
                  'Moderate_Physical_Effort_30m','Walking_30m','Vigorous_Activities_4h', 'Moderate_Physical_Effort_4h','Walking_4h',
                  
                  'Smoke', 'Smoke_still','Number_Cigarettes','Drink',
                  
                  "Cognition_score", "Psychiatric_score", "wave"
                  ]
# 2020年把帕金森和记忆病症分开，需要和以前对齐   
def process_row(row):
    da002_12_ = row['da003_12_']
    da002_13_ = row['da003_13_']
    
    if da002_12_ == 1 or da002_13_ == 1:
        return 1
    elif da002_12_ == 2 and da002_13_ == 2:
        return 2
    elif (da002_12_ == 2 and pd.isna(da002_13_)) or (pd.isna(da002_12_) and da002_13_ == 2):
        return 2
    elif pd.isna(da002_12_) and pd.isna(da002_13_):
        return np.nan
    else:
        return np.nan  # 预防万一，其余情况下设为NA
    
def update_da051(value):
    if value == 1:
        return 3
    elif value == 3:
        return 1
    else:
        return value
    
if __name__ == "__main__":
    # 2011年
    year = "2011"
    demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/demographic_background.dta")
    psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/psu.dta", encoding='gbk')
    biomarkers, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/biomarkers.dta")
    blood, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Blood_20140429.dta")
    health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/health_status_and_functioning.dta")
    health_care, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/health_care_and_insurance.dta")
    exp_income, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/exp_income_wealth.dta")

    #性别#年龄#居住地#婚姻状况
    # 1 Married with spouse present
    # 2 Married but not living with spouse temporarily for reasons such as work
    # 3 Separated
    # 4 Divorced
    # 5 Widowed
    # 6 Never married
    data_2011 = demo[['ID','householdID', 'communityID','rgender','ba002_1','be001']]

    #居住地
    data_2011 = pd.merge(data_2011, psu[['communityID', 'province', 'city']], on = "communityID", how="left")

    #身高#体重#收缩压#舒张压
    biomarkers_select = biomarkers[['ID','householdID', 'communityID','qi002','ql002','qa011','qa012']]
    data_2011 = pd.merge(data_2011, biomarkers_select, on = ["ID", "householdID", "communityID"], how="left")

    #白细胞（WBC），平均红血球容积MCV,血小板,血尿素氮bun,葡萄糖glu,血肌酐crea,总胆固醇cho,甘油三酯tg,高密度脂蛋白HDL,低密度脂蛋白胆固醇LDL,C反应蛋白CRP
    #糖化血红蛋白hba1c,尿酸ua,血细胞比容Hematocrit,血红蛋白hgb,胱抑素C
    blood = blood.loc[:, blood.columns.difference(["bloodweight", "qc1_va003"])]
    data_2011 = pd.merge(data_2011, blood, on = ["ID"], how="left")
    # 慢性病：
    # (1)  Hypertension 高血压病    
    # (2)	Dyslipidemia (elevation of low density lipoprotein, triglycerides (TGs),and total cholesterol, or a low high density lipoprotein level)血脂异常（包括低密度脂蛋白、甘油三酯、总胆固醇的升高或（和）高密度脂蛋白的下降）
    # (3)	Diabetes or high blood sugar糖尿病或血糖升高（包括糖耐量异常和空腹血糖升高）
    # (4)	Cancer or malignant tumor (excluding minor skin cancers) 癌症等恶性肿瘤（不包括轻度皮肤癌）
    # (5)	Chronic lung diseases, such as chronic bronchitis , emphysema ( excluding tumors, or cancer) 慢性肺部疾患如慢性支气管炎或肺气肿、肺心病（不包括肿瘤或癌）
    #        (6)  Liver disease (except fatty liver, tumors, and cancer) 肝脏疾病
    # （除脂肪肝、肿瘤或癌外）
    # (7)	Heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems 心脏病（如心肌梗塞、冠心病、心绞痛、充血性心力衰竭和其他心脏疾病）
    # (8)	 Stroke  中风
    # (9)	 Kidney disease (except for tumor or cancer) 肾脏疾病（不包括肿瘤或癌）
    # (10)	 Stomach or other digestive disease (except for tumor or cancer) 胃部疾病或消化系统疾病（不包括肿瘤或癌）
    # (11)	 Emotional, nervous, or psychiatric problems 情感及精神方面问题 
    # (12)	 Memory-related disease 与记忆相关的疾病 （如老年痴呆症、脑萎缩、帕金森症）
    # (13)	 Arthritis or rheumatism 关节炎或风湿病
    # (14)  Asthma  哮喘
    health_status_select = health_status[['ID','householdID', 'communityID', 'da007_1_', 'da007_2_','da007_3_'
                                   ,'da007_4_','da007_5_','da007_6_','da007_7_','da007_8_','da007_9_','da007_10_','da007_11_'
                                   ,'da007_12_','da007_13_','da007_14_','da051_1_','da051_2_', 'da051_3_'
                                   ,'da052_1_','da052_2_','da052_3_','da053_1_','da053_2_','da053_3_','da054_1_','da054_2_','da054_3_'
                                   ,'da055_1_','da055_2_','da055_3_', 'da059','da061','da063'
                                   ,'da069']]
    
    data_2011 = pd.merge(data_2011, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")

    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
    health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
    health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
    health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
    health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
    health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
    health_status["dc022_score"] = health_status["dc022"].apply(lambda x : 1 if x==72 else 0 if pd.isna(x) else 0)
    health_status["dc023_score"] = health_status["dc023"].apply(lambda x : 1 if x==65 else 0 if pd.isna(x) else 0)

    #词语记忆
    health_status["dc006s1_score"] = health_status["dc006s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc006s2_score"] = health_status["dc006s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
    health_status["dc006s3_score"] = health_status["dc006s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
    health_status["dc006s4_score"] = health_status["dc006s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
    health_status["dc006s5_score"] = health_status["dc006s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
    health_status["dc006s6_score"] = health_status["dc006s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
    health_status["dc006s7_score"] = health_status["dc006s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
    health_status["dc006s8_score"] = health_status["dc006s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
    health_status["dc006s9_score"] = health_status["dc006s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
    health_status["dc006s10_score"] = health_status["dc006s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                           
    health_status["dc006s11_score"] = health_status["dc006s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
    health_status["dc027s1_score"] = health_status["dc027s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0) 
    health_status["dc027s2_score"] = health_status["dc027s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0) 
    health_status["dc027s3_score"] = health_status["dc027s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0) 
    health_status["dc027s4_score"] = health_status["dc027s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
    health_status["dc027s5_score"] = health_status["dc027s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
    health_status["dc027s6_score"] = health_status["dc027s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
    health_status["dc027s7_score"] = health_status["dc027s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
    health_status["dc027s8_score"] = health_status["dc027s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
    health_status["dc027s9_score"] = health_status["dc027s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
    health_status["dc027s10_score"] = health_status["dc027s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                            
    health_status["dc027s11_score"] = health_status["dc027s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
    #画图
    health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0)

    data_2011["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
        health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
        health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
        health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
        health_status["dc006s5_score"] + health_status["dc006s6_score"] + health_status["dc006s7_score"] + \
        health_status["dc006s8_score"] + health_status["dc006s9_score"] + health_status["dc006s10_score"] + \
        health_status["dc006s11_score"] + health_status["dc027s1_score"]+ health_status["dc027s2_score"]+ \
        health_status["dc027s3_score"]+ health_status["dc027s4_score"]+ health_status["dc027s5_score"]+ \
        health_status["dc027s6_score"]+ health_status["dc027s7_score"]+ health_status["dc027s8_score"]+ \
        health_status["dc027s9_score"]+health_status["dc027s10_score"]+health_status["dc027s11_score"]+\
        health_status["draw_score"]
    #心理得分
    health_status["dc009_score"] = health_status["dc009"]-1
    health_status["dc010_score"] = health_status["dc010"]-1
    health_status["dc011_score"] = health_status["dc011"]-1
    health_status["dc012_score"] = health_status["dc012"]-1   
    health_status["dc013_score"] = 4 - health_status["dc013"] 
    health_status["dc014_score"] = health_status["dc014"]-1   
    health_status["dc015_score"] = health_status["dc015"]-1   
    health_status["dc016_score"] = 4 - health_status["dc016"]
    health_status["dc017_score"] = health_status["dc017"]-1   
    health_status["dc018_score"] = health_status["dc018"]-1 
    data_2011["psychiatric_score"] = health_status["dc009_score"] + health_status["dc010_score"] + health_status["dc011_score"] + \
        health_status["dc012_score"] + health_status["dc013_score"] + health_status["dc014_score"] + health_status["dc015_score"] + \
        health_status["dc016_score"] + health_status["dc017_score"] + health_status["dc018_score"]
    data_2011["wave"] = year
    change_columns(data_2011)

    # 2013年
    year = "2013"
    demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Demographic_Background.dta")
    psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/PSU.dta", encoding='gbk')
    biomarkers, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Biomarker.dta")
    health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Status_and_Functioning.dta")
    health_care, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Care_and_Insurance.dta")
    exp_income, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/exp_income_wealth.dta")

    #性别#年龄#婚姻状况
    # 1 Married with spouse present
    # 2 Married but not living with spouse temporarily for reasons such as work
    # 3 Separated
    # 4 Divorced
    # 5 Widowed
    # 6 Never married
    data_2013 = demo[['ID','householdID', 'communityID','ba000_w2_3','zba002_1','be001']]
    #居住地
    data_2013 = pd.merge(data_2013, psu[['communityID', 'province', 'city']], on = "communityID", how="left")

    #身高#体重#收缩压#舒张压
    biomarkers_select = biomarkers[['ID','householdID', 'communityID','qi002','ql002','qa011','qa012']]
    data_2013 = pd.merge(data_2013, biomarkers_select, on = ["ID", "householdID", "communityID"], how="left")

    #白细胞（WBC），平均红血球容积MCV,血小板,血尿素氮bun,葡萄糖glu,血肌酐crea,总胆固醇cho,甘油三酯tg,高密度脂蛋白HDL,低密度脂蛋白胆固醇LDL,C反应蛋白CRP
    #糖化血红蛋白hba1c,尿酸ua,血细胞比容Hematocrit,血红蛋白hgb,胱抑素C
    data_2013[['bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp','bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc']]=np.nan
    
    # 慢性病：
    # (1)  Hypertension 高血压病    
    # (2)	Dyslipidemia (elevation of low density lipoprotein, triglycerides (TGs),and total cholesterol, or a low high density lipoprotein level)血脂异常（包括低密度脂蛋白、甘油三酯、总胆固醇的升高或（和）高密度脂蛋白的下降）
    # (3)	Diabetes or high blood sugar糖尿病或血糖升高（包括糖耐量异常和空腹血糖升高）
    # (4)	Cancer or malignant tumor (excluding minor skin cancers) 癌症等恶性肿瘤（不包括轻度皮肤癌）
    # (5)	Chronic lung diseases, such as chronic bronchitis , emphysema ( excluding tumors, or cancer) 慢性肺部疾患如慢性支气管炎或肺气肿、肺心病（不包括肿瘤或癌）
    #        (6)  Liver disease (except fatty liver, tumors, and cancer) 肝脏疾病
    # （除脂肪肝、肿瘤或癌外）
    # (7)	Heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems 心脏病（如心肌梗塞、冠心病、心绞痛、充血性心力衰竭和其他心脏疾病）
    # (8)	 Stroke  中风
    # (9)	 Kidney disease (except for tumor or cancer) 肾脏疾病（不包括肿瘤或癌）
    # (10)	 Stomach or other digestive disease (except for tumor or cancer) 胃部疾病或消化系统疾病（不包括肿瘤或癌）
    # (11)	 Emotional, nervous, or psychiatric problems 情感及精神方面问题 
    # (12)	 Memory-related disease 与记忆相关的疾病 （如老年痴呆症、脑萎缩、帕金森症）
    # (13)	 Arthritis or rheumatism 关节炎或风湿病
    # (14)  Asthma  哮喘
    health_status_select = health_status[['ID','householdID', 'communityID', 'da007_1_', 'da007_2_','da007_3_'
                                   ,'da007_4_','da007_5_','da007_6_','da007_7_','da007_8_','da007_9_','da007_10_','da007_11_'
                                   ,'da007_12_','da007_13_','da007_14_','da051_1_','da051_2_', 'da051_3_'
                                   ,'da052_1_','da052_2_','da052_3_','da053_1_','da053_2_','da053_3_','da054_1_','da054_2_','da054_3_'
                                   ,'da055_1_','da055_2_','da055_3_', 'da059','da061','da063'
                                   ,'da069']]
    
    data_2013 = pd.merge(data_2013, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")

    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
    health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
    health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
    health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
    health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
    health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
    health_status["dc022_score"] = health_status["dc022"].apply(lambda x : 1 if x==72 else 0 if pd.isna(x) else 0)
    health_status["dc023_score"] = health_status["dc023"].apply(lambda x : 1 if x==65 else 0 if pd.isna(x) else 0)

    #词语记忆
    health_status["dc006s1_score"] = health_status["dc006_1_s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc006s2_score"] = health_status["dc006_1_s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
    health_status["dc006s3_score"] = health_status["dc006_1_s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
    health_status["dc006s4_score"] = health_status["dc006_1_s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
    health_status["dc006s5_score"] = health_status["dc006_1_s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
    health_status["dc006s6_score"] = health_status["dc006_1_s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
    health_status["dc006s7_score"] = health_status["dc006_1_s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
    health_status["dc006s8_score"] = health_status["dc006_1_s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
    health_status["dc006s9_score"] = health_status["dc006_1_s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
    health_status["dc006s10_score"] = health_status["dc006_1_s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                           
    health_status["dc006s11_score"] = health_status["dc006_1_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
    health_status["dc027s1_score"] = health_status["dc027s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0) 
    health_status["dc027s2_score"] = health_status["dc027s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0) 
    health_status["dc027s3_score"] = health_status["dc027s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0) 
    health_status["dc027s4_score"] = health_status["dc027s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
    health_status["dc027s5_score"] = health_status["dc027s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
    health_status["dc027s6_score"] = health_status["dc027s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
    health_status["dc027s7_score"] = health_status["dc027s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
    health_status["dc027s8_score"] = health_status["dc027s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
    health_status["dc027s9_score"] = health_status["dc027s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
    health_status["dc027s10_score"] = health_status["dc027s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                            
    health_status["dc027s11_score"] = health_status["dc027s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
    #画图
    health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0)

    data_2013["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
        health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
        health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
        health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
        health_status["dc006s5_score"] + health_status["dc006s6_score"] + health_status["dc006s7_score"] + \
        health_status["dc006s8_score"] + health_status["dc006s9_score"] + health_status["dc006s10_score"] + \
        health_status["dc006s11_score"] + health_status["dc027s1_score"]+ health_status["dc027s2_score"]+ \
        health_status["dc027s3_score"]+ health_status["dc027s4_score"]+ health_status["dc027s5_score"]+ \
        health_status["dc027s6_score"]+ health_status["dc027s7_score"]+ health_status["dc027s8_score"]+ \
        health_status["dc027s9_score"]+health_status["dc027s10_score"]+health_status["dc027s11_score"]+\
        health_status["draw_score"]
    #心理得分
    health_status["dc009_score"] = health_status["dc009"]-1
    health_status["dc010_score"] = health_status["dc010"]-1
    health_status["dc011_score"] = health_status["dc011"]-1
    health_status["dc012_score"] = health_status["dc012"]-1   
    health_status["dc013_score"] = 4 - health_status["dc013"] 
    health_status["dc014_score"] = health_status["dc014"]-1   
    health_status["dc015_score"] = health_status["dc015"]-1   
    health_status["dc016_score"] = 4 - health_status["dc016"]
    health_status["dc017_score"] = health_status["dc017"]-1   
    health_status["dc018_score"] = health_status["dc018"]-1 
    data_2013["psychiatric_score"] = health_status["dc009_score"] + health_status["dc010_score"] + health_status["dc011_score"] + \
        health_status["dc012_score"] + health_status["dc013_score"] + health_status["dc014_score"] + health_status["dc015_score"] + \
        health_status["dc016_score"] + health_status["dc017_score"] + health_status["dc018_score"]
    data_2013["wave"] = year
    change_columns(data_2013)
    data_2013 = pd.concat([data_2011, data_2013], axis=0)

    # 2015年
    year = "2015"
    demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Demographic_Background.dta")
    psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2013/PSU.dta", encoding='gbk')
    blood, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Blood.dta")
    biomarkers, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Biomarker.dta")
    health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Status_and_Functioning.dta")
    health_care, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Care_and_Insurance.dta")

    #性别#年龄#婚姻状况
    # 1 Married with spouse present
    # 2 Married but not living with spouse temporarily for reasons such as work
    # 3 Separated
    # 4 Divorced
    # 5 Widowed
    # 6 Never married
    data_2015 = demo[['ID','householdID', 'communityID','ba000_w2_3', 'ba004_w3_1', 'be001']]
    # 处理出生年的问题
    data_2015['ba004_w3_1'] = demo.apply(lambda row: row['ba002_1'] if row['ba002'] == 2 else row['ba004_w3_1'], axis=1)

    #居住地
    data_2015 = pd.merge(data_2015, psu[['communityID', 'province', 'city']], on = "communityID", how="left")

    #身高#体重#收缩压#舒张压
    biomarkers_select = biomarkers[['ID','householdID', 'communityID','qi002', 'ql002', 'qa011','qa012']]
    data_2015 = pd.merge(data_2015, biomarkers_select, on = ["ID", "householdID", "communityID"], how="left")

    #白细胞（WBC），平均红血球容积MCV,血小板,血尿素氮bun,葡萄糖glu,血肌酐crea,总胆固醇cho,甘油三酯tg,高密度脂蛋白HDL,低密度脂蛋白胆固醇LDL,C反应蛋白CRP
    #糖化血红蛋白hba1c,尿酸ua,血细胞比容Hematocrit,血红蛋白hgb,胱抑素C
    blood = blood[['ID', 'bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp','bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc']]
    data_2015 = pd.merge(data_2015, blood, on = ["ID"], how="left")
    
    # 慢性病：
    # (1)  Hypertension 高血压病    
    # (2)	Dyslipidemia (elevation of low density lipoprotein, triglycerides (TGs),and total cholesterol, or a low high density lipoprotein level)血脂异常（包括低密度脂蛋白、甘油三酯、总胆固醇的升高或（和）高密度脂蛋白的下降）
    # (3)	Diabetes or high blood sugar糖尿病或血糖升高（包括糖耐量异常和空腹血糖升高）
    # (4)	Cancer or malignant tumor (excluding minor skin cancers) 癌症等恶性肿瘤（不包括轻度皮肤癌）
    # (5)	Chronic lung diseases, such as chronic bronchitis , emphysema ( excluding tumors, or cancer) 慢性肺部疾患如慢性支气管炎或肺气肿、肺心病（不包括肿瘤或癌）
    #        (6)  Liver disease (except fatty liver, tumors, and cancer) 肝脏疾病
    # （除脂肪肝、肿瘤或癌外）
    # (7)	Heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems 心脏病（如心肌梗塞、冠心病、心绞痛、充血性心力衰竭和其他心脏疾病）
    # (8)	 Stroke  中风
    # (9)	 Kidney disease (except for tumor or cancer) 肾脏疾病（不包括肿瘤或癌）
    # (10)	 Stomach or other digestive disease (except for tumor or cancer) 胃部疾病或消化系统疾病（不包括肿瘤或癌）
    # (11)	 Emotional, nervous, or psychiatric problems 情感及精神方面问题 
    # (12)	 Memory-related disease 与记忆相关的疾病 （如老年痴呆症、脑萎缩、帕金森症）
    # (13)	 Arthritis or rheumatism 关节炎或风湿病
    # (14)  Asthma  哮喘
    health_status_select = health_status[['ID','householdID', 'communityID', 'da007_1_', 'da007_2_','da007_3_'
                                   ,'da007_4_','da007_5_','da007_6_','da007_7_','da007_8_','da007_9_','da007_10_','da007_11_'
                                   ,'da007_12_','da007_13_','da007_14_','da051_1_','da051_2_', 'da051_3_'
                                   ,'da052_1_','da052_2_','da052_3_','da053_1_','da053_2_','da053_3_','da054_1_','da054_2_','da054_3_'
                                   ,'da055_1_','da055_2_','da055_3_', 'da059','da061','da063'
                                   ,'da069']]
    
    data_2015 = pd.merge(data_2015, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")

    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
    health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
    health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
    health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
    health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
    health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
    health_status["dc022_score"] = health_status["dc022"].apply(lambda x : 1 if x==72 else 0 if pd.isna(x) else 0)
    health_status["dc023_score"] = health_status["dc023"].apply(lambda x : 1 if x==65 else 0 if pd.isna(x) else 0)

    #词语记忆
    health_status["dc006s1_score"] = health_status["dc006s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc006s2_score"] = health_status["dc006s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
    health_status["dc006s3_score"] = health_status["dc006s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
    health_status["dc006s4_score"] = health_status["dc006s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
    health_status["dc006s5_score"] = health_status["dc006s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
    health_status["dc006s6_score"] = health_status["dc006s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
    health_status["dc006s7_score"] = health_status["dc006s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
    health_status["dc006s8_score"] = health_status["dc006s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
    health_status["dc006s9_score"] = health_status["dc006s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
    health_status["dc006s10_score"] = health_status["dc006s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                           
    health_status["dc006s11_score"] = health_status["dc006s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
    health_status["dc027s1_score"] = health_status["dc027s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0) 
    health_status["dc027s2_score"] = health_status["dc027s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0) 
    health_status["dc027s3_score"] = health_status["dc027s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0) 
    health_status["dc027s4_score"] = health_status["dc027s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
    health_status["dc027s5_score"] = health_status["dc027s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
    health_status["dc027s6_score"] = health_status["dc027s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
    health_status["dc027s7_score"] = health_status["dc027s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
    health_status["dc027s8_score"] = health_status["dc027s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
    health_status["dc027s9_score"] = health_status["dc027s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
    health_status["dc027s10_score"] = health_status["dc027s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                            
    health_status["dc027s11_score"] = health_status["dc027s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
    #画图
    health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0)

    data_2015["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
        health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
        health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
        health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
        health_status["dc006s5_score"] + health_status["dc006s6_score"] + health_status["dc006s7_score"] + \
        health_status["dc006s8_score"] + health_status["dc006s9_score"] + health_status["dc006s10_score"] + \
        health_status["dc006s11_score"] + health_status["dc027s1_score"]+ health_status["dc027s2_score"]+ \
        health_status["dc027s3_score"]+ health_status["dc027s4_score"]+ health_status["dc027s5_score"]+ \
        health_status["dc027s6_score"]+ health_status["dc027s7_score"]+ health_status["dc027s8_score"]+ \
        health_status["dc027s9_score"]+health_status["dc027s10_score"]+health_status["dc027s11_score"]+\
        health_status["draw_score"]
    #心理得分
    health_status["dc009_score"] = health_status["dc009"]-1
    health_status["dc010_score"] = health_status["dc010"]-1
    health_status["dc011_score"] = health_status["dc011"]-1
    health_status["dc012_score"] = health_status["dc012"]-1   
    health_status["dc013_score"] = 4 - health_status["dc013"] 
    health_status["dc014_score"] = health_status["dc014"]-1   
    health_status["dc015_score"] = health_status["dc015"]-1   
    health_status["dc016_score"] = 4 - health_status["dc016"]
    health_status["dc017_score"] = health_status["dc017"]-1   
    health_status["dc018_score"] = health_status["dc018"]-1 
    data_2015["psychiatric_score"] = health_status["dc009_score"] + health_status["dc010_score"] + health_status["dc011_score"] + \
        health_status["dc012_score"] + health_status["dc013_score"] + health_status["dc014_score"] + health_status["dc015_score"] + \
        health_status["dc016_score"] + health_status["dc017_score"] + health_status["dc018_score"]
    data_2015["wave"] = year
    change_columns(data_2015)
    data_2015 = pd.concat([data_2013, data_2015], axis=0)

    # 2018年
    year = "2018"
    demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Demographic_Background.dta")
    psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2013/PSU.dta", encoding='gbk')
    health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Status_and_Functioning.dta")
    health_care, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Care_and_Insurance.dta")
    cognition, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Cognition.dta")

    #性别#年龄#婚姻状况
    # 1 Married with spouse present
    # 2 Married but not living with spouse temporarily for reasons such as work
    # 3 Separated
    # 4 Divorced
    # 5 Widowed
    # 6 Never married
    data_2018 = demo[['ID','householdID', 'communityID','ba000_w2_3', 'ba004_w3_1', 'be001']]
    #居住地
    data_2018 = pd.merge(data_2018, psu[['communityID', 'province', 'city']], on = "communityID", how="left")

    #身高#体重#收缩压#舒张压
    data_2018[['qi002', 'ql002', 'qa011','qa012']]=np.nan

    #白细胞（WBC），平均红血球容积MCV,血小板,血尿素氮bun,葡萄糖glu,血肌酐crea,总胆固醇cho,甘油三酯tg,高密度脂蛋白HDL,低密度脂蛋白胆固醇LDL,C反应蛋白CRP
    #糖化血红蛋白hba1c,尿酸ua,血细胞比容Hematocrit,血红蛋白hgb,胱抑素C
    data_2018[['bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp','bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc']]=np.nan
    
    # 慢性病：
    # (1)  Hypertension 高血压病    
    # (2)	Dyslipidemia (elevation of low density lipoprotein, triglycerides (TGs),and total cholesterol, or a low high density lipoprotein level)血脂异常（包括低密度脂蛋白、甘油三酯、总胆固醇的升高或（和）高密度脂蛋白的下降）
    # (3)	Diabetes or high blood sugar糖尿病或血糖升高（包括糖耐量异常和空腹血糖升高）
    # (4)	Cancer or malignant tumor (excluding minor skin cancers) 癌症等恶性肿瘤（不包括轻度皮肤癌）
    # (5)	Chronic lung diseases, such as chronic bronchitis , emphysema ( excluding tumors, or cancer) 慢性肺部疾患如慢性支气管炎或肺气肿、肺心病（不包括肿瘤或癌）
    #        (6)  Liver disease (except fatty liver, tumors, and cancer) 肝脏疾病
    # （除脂肪肝、肿瘤或癌外）
    # (7)	Heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems 心脏病（如心肌梗塞、冠心病、心绞痛、充血性心力衰竭和其他心脏疾病）
    # (8)	 Stroke  中风
    # (9)	 Kidney disease (except for tumor or cancer) 肾脏疾病（不包括肿瘤或癌）
    # (10)	 Stomach or other digestive disease (except for tumor or cancer) 胃部疾病或消化系统疾病（不包括肿瘤或癌）
    # (11)	 Emotional, nervous, or psychiatric problems 情感及精神方面问题 
    # (12)	 Memory-related disease 与记忆相关的疾病 （如老年痴呆症、脑萎缩、帕金森症）
    # (13)	 Arthritis or rheumatism 关节炎或风湿病
    # (14)  Asthma  哮喘
    health_status_select = health_status[['ID','householdID', 'communityID', 'da007_1_', 'da007_2_','da007_3_'
                                   ,'da007_4_','da007_5_','da007_6_','da007_7_','da007_8_','da007_9_','da007_10_','da007_11_'
                                   ,'da007_12_','da007_13_','da007_14_','da051_1_','da051_2_', 'da051_3_'
                                   ,'da052_1_','da052_2_','da052_3_','da053_1_','da053_2_','da053_3_','da054_1_','da054_2_','da054_3_'
                                   ,'da055_1_','da055_2_','da055_3_', 'da059','da061','da063'
                                   ,'da069']]

    data_2018 = pd.merge(data_2018, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")

    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
    cognition["dc001s1_score"] = cognition["dc001_w4"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    cognition["dc001s2_score"] = cognition["dc006_w4"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
    cognition["dc001s3_score"] = cognition["dc003_w4"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
    cognition["dc002_score"] = cognition["dc005_w4"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    cognition["dc003_score"] = cognition["dc002_w4"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    cognition["dc019_score"] = cognition["dc014_w4_1_1"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
    cognition["dc020_score"] = cognition["dc014_w4_2_1"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
    cognition["dc021_score"] = cognition["dc014_w4_3_1"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
    cognition["dc022_score"] = cognition["dc014_w4_4_1"].apply(lambda x : 1 if x==72 else 0 if pd.isna(x) else 0)
    cognition["dc023_score"] = cognition["dc014_w4_5_1"].apply(lambda x : 1 if x==65 else 0 if pd.isna(x) else 0)

    #词语记忆
    cognition["dc006s1_score"] = cognition["dc028_w4_s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    cognition["dc006s2_score"] = cognition["dc028_w4_s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
    cognition["dc006s3_score"] = cognition["dc028_w4_s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
    cognition["dc006s4_score"] = cognition["dc028_w4_s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
    cognition["dc006s5_score"] = cognition["dc028_w4_s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
    cognition["dc006s6_score"] = cognition["dc028_w4_s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
    cognition["dc006s7_score"] = cognition["dc028_w4_s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
    cognition["dc006s8_score"] = cognition["dc028_w4_s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
    cognition["dc006s9_score"] = cognition["dc028_w4_s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
    cognition["dc006s10_score"] = cognition["dc028_w4_s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                           
    cognition["dc006s11_score"] = cognition["dc028_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
    cognition["dc027s1_score"] = cognition["dc047_w4_s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0) 
    cognition["dc027s2_score"] = cognition["dc047_w4_s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0) 
    cognition["dc027s3_score"] = cognition["dc047_w4_s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0) 
    cognition["dc027s4_score"] = cognition["dc047_w4_s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
    cognition["dc027s5_score"] = cognition["dc047_w4_s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
    cognition["dc027s6_score"] = cognition["dc047_w4_s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
    cognition["dc027s7_score"] = cognition["dc047_w4_s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
    cognition["dc027s8_score"] = cognition["dc047_w4_s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
    cognition["dc027s9_score"] = cognition["dc047_w4_s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
    cognition["dc027s10_score"] = cognition["dc047_w4_s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                            
    cognition["dc027s11_score"] = cognition["dc047_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
    #画图
    cognition["draw_score"] = cognition["dc024_w4"].apply(lambda x : 1 if x==1 else 0)

    data_2018["Cognition_score"] = cognition["dc001s1_score"] + cognition["dc001s2_score"] + \
        cognition["dc001s3_score"] + cognition["dc002_score"]+ cognition["dc003_score"]+ \
        cognition["dc019_score"]+ cognition["dc020_score"] + cognition["dc021_score"]+ \
        cognition["dc022_score"]+ cognition["dc023_score"] + cognition["dc006s1_score"] + \
        cognition["dc006s2_score"] + cognition["dc006s3_score"] + cognition["dc006s4_score"] + \
        cognition["dc006s5_score"] + cognition["dc006s6_score"] + cognition["dc006s7_score"] + \
        cognition["dc006s8_score"] + cognition["dc006s9_score"] + cognition["dc006s10_score"] + \
        cognition["dc006s11_score"] + cognition["dc027s1_score"]+ cognition["dc027s2_score"]+ \
        cognition["dc027s3_score"]+ cognition["dc027s4_score"]+ cognition["dc027s5_score"]+ \
        cognition["dc027s6_score"]+ cognition["dc027s7_score"]+ cognition["dc027s8_score"]+ \
        cognition["dc027s9_score"]+cognition["dc027s10_score"]+cognition["dc027s11_score"]+\
        cognition["draw_score"]
    #心理得分
    cognition["dc009_score"] = cognition["dc009"]-1
    cognition["dc010_score"] = cognition["dc010"]-1
    cognition["dc011_score"] = cognition["dc011"]-1
    cognition["dc012_score"] = cognition["dc012"]-1   
    cognition["dc013_score"] = 4 - cognition["dc013"] 
    cognition["dc014_score"] = cognition["dc014"]-1   
    cognition["dc015_score"] = cognition["dc015"]-1   
    cognition["dc016_score"] = 4 - cognition["dc016"]
    cognition["dc017_score"] = cognition["dc017"]-1   
    cognition["dc018_score"] = cognition["dc018"]-1 
    data_2018["psychiatric_score"] = cognition["dc009_score"] + cognition["dc010_score"] + cognition["dc011_score"] + \
        cognition["dc012_score"] + cognition["dc013_score"] + cognition["dc014_score"] + cognition["dc015_score"] + \
        cognition["dc016_score"] + cognition["dc017_score"] + cognition["dc018_score"]
    data_2018["wave"] = year
    change_columns(data_2018)
    data_2018 = pd.concat([data_2015, data_2018], axis=0)

    # 2020年
    year = "2020"
    demo, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Demographic_Background.dta")
    psu, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2013/PSU.dta", encoding='gbk')
    health_status, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Health_Status_and_Functioning.dta")

    #性别#年龄#婚姻状况
    # 1 Married with spouse present
    # 2 Married but not living with spouse temporarily for reasons such as work
    # 3 Separated
    # 4 Divorced
    # 5 Widowed
    # 6 Never married
    data_2020 = demo[['ID','householdID', 'communityID','ba001', 'ba003_1','ba011']]
    #居住地
    data_2020 = pd.merge(data_2020, psu[['communityID', 'province', 'city']], on = "communityID", how="left")

    #身高#体重#收缩压#舒张压
    data_2020[['qi002', 'ql002', 'qa011','qa012', 'qa013']]=np.nan

    #白细胞（WBC），平均红血球容积MCV,血小板,血尿素氮bun,葡萄糖glu,血肌酐crea,总胆固醇cho,甘油三酯tg,高密度脂蛋白HDL,低密度脂蛋白胆固醇LDL,C反应蛋白CRP
    #糖化血红蛋白hba1c,尿酸ua,血细胞比容Hematocrit,血红蛋白hgb,胱抑素C
    data_2020[['bl_wbc','bl_mcv','bl_plt','bl_bun','bl_glu','bl_crea','bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl','bl_crp','bl_hbalc','bl_ua', 'bl_hct', 'bl_hgb','bl_cysc']]=np.nan
    
    # 慢性病：
    # (1)  Hypertension 高血压病    
    # (2)	Dyslipidemia (elevation of low density lipoprotein, triglycerides (TGs),and total cholesterol, or a low high density lipoprotein level)血脂异常（包括低密度脂蛋白、甘油三酯、总胆固醇的升高或（和）高密度脂蛋白的下降）
    # (3)	Diabetes or high blood sugar糖尿病或血糖升高（包括糖耐量异常和空腹血糖升高）
    # (4)	Cancer or malignant tumor (excluding minor skin cancers) 癌症等恶性肿瘤（不包括轻度皮肤癌）
    # (5)	Chronic lung diseases, such as chronic bronchitis , emphysema ( excluding tumors, or cancer) 慢性肺部疾患如慢性支气管炎或肺气肿、肺心病（不包括肿瘤或癌）
    #        (6)  Liver disease (except fatty liver, tumors, and cancer) 肝脏疾病
    # （除脂肪肝、肿瘤或癌外）
    # (7)	Heart attack, coronary heart disease, angina, congestive heart failure, or other heart problems 心脏病（如心肌梗塞、冠心病、心绞痛、充血性心力衰竭和其他心脏疾病）
    # (8)	 Stroke  中风
    # (9)	 Kidney disease (except for tumor or cancer) 肾脏疾病（不包括肿瘤或癌）
    # (10)	 Stomach or other digestive disease (except for tumor or cancer) 胃部疾病或消化系统疾病（不包括肿瘤或癌）
    # (11)	 Emotional, nervous, or psychiatric problems 情感及精神方面问题 
    # (12)	 Memory-related disease 与记忆相关的疾病 （如老年痴呆症、脑萎缩、帕金森症）
    # (13)	 Arthritis or rheumatism 关节炎或风湿病
    # (14)  Asthma  哮喘
    # 2020年把帕金森和记忆病症分开，需要和以前对齐
    health_status['da003_12_'] = health_status.apply(process_row, axis=1)
    health_status_select = health_status[['ID','householdID', 'communityID', 'da003_1_', 'da003_2_','da003_3_'
                                   ,'da003_4_','da003_5_','da003_6_','da003_7_','da003_8_','da003_9_','da003_10_','da003_11_'
                                   ,'da003_12_','da003_14_','da003_15_','da032_1_','da032_2_', 'da032_3_'
                                   ,'da033_1_','da033_2_','da033_3_','da034_1_','da034_2_','da034_3_','da035_1_','da035_2_','da035_3_'
                                    ,'da036_1_','da036_2_','da036_3_', 'da046','da047','da050_1'
                                   ,'da051']]
    health_status_select['da051'] = health_status_select['da051'].apply(update_da051)
    
    data_2020 = pd.merge(data_2020, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")

    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
    health_status["dc001s1_score"] = health_status["dc001"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc001s2_score"] = health_status["dc005"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
    health_status["dc001s3_score"] = health_status["dc003"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
    health_status["dc002_score"] = health_status["dc004"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc003_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc019_score"] = health_status["dc007_1"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
    health_status["dc020_score"] = health_status["dc007_2"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
    health_status["dc021_score"] = health_status["dc007_3"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
    health_status["dc022_score"] = health_status["dc007_4"].apply(lambda x : 1 if x==72 else 0 if pd.isna(x) else 0)
    health_status["dc023_score"] = health_status["dc007_5"].apply(lambda x : 1 if x==65 else 0 if pd.isna(x) else 0)

    #词语记忆
    health_status["dc006s1_score"] = health_status["dc012_s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
    health_status["dc006s2_score"] = health_status["dc012_s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
    health_status["dc006s3_score"] = health_status["dc012_s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
    health_status["dc006s4_score"] = health_status["dc012_s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
    health_status["dc006s5_score"] = health_status["dc012_s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
    health_status["dc006s6_score"] = health_status["dc012_s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
    health_status["dc006s7_score"] = health_status["dc012_s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
    health_status["dc006s8_score"] = health_status["dc012_s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
    health_status["dc006s9_score"] = health_status["dc012_s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
    health_status["dc006s10_score"] = health_status["dc012_s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                           
    health_status["dc006s11_score"] = health_status["dc012_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
    health_status["dc027s1_score"] = health_status["dc028_s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0) 
    health_status["dc027s2_score"] = health_status["dc028_s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0) 
    health_status["dc027s3_score"] = health_status["dc028_s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0) 
    health_status["dc027s4_score"] = health_status["dc028_s4"].apply(lambda x : 1 if x==4 else 0 if pd.isna(x) else 0) 
    health_status["dc027s5_score"] = health_status["dc028_s5"].apply(lambda x : 1 if x==5 else 0 if pd.isna(x) else 0) 
    health_status["dc027s6_score"] = health_status["dc028_s6"].apply(lambda x : 1 if x==6 else 0 if pd.isna(x) else 0)                                            
    health_status["dc027s7_score"] = health_status["dc028_s7"].apply(lambda x : 1 if x==7 else 0 if pd.isna(x) else 0) 
    health_status["dc027s8_score"] = health_status["dc028_s8"].apply(lambda x : 1 if x==8 else 0 if pd.isna(x) else 0) 
    health_status["dc027s9_score"] = health_status["dc028_s9"].apply(lambda x : 1 if x==9 else 0 if pd.isna(x) else 0)                                            
    health_status["dc027s10_score"] = health_status["dc028_s10"].apply(lambda x : 1 if x==10 else 0 if pd.isna(x) else 0)                                            
    health_status["dc027s11_score"] = health_status["dc028_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
    #画图
    health_status["draw_score"] = health_status["dc009"].apply(lambda x : 1 if x==1 else 0)

    data_2011["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
        health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
        health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
        health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
        health_status["dc006s5_score"] + health_status["dc006s6_score"] + health_status["dc006s7_score"] + \
        health_status["dc006s8_score"] + health_status["dc006s9_score"] + health_status["dc006s10_score"] + \
        health_status["dc006s11_score"] + health_status["dc027s1_score"]+ health_status["dc027s2_score"]+ \
        health_status["dc027s3_score"]+ health_status["dc027s4_score"]+ health_status["dc027s5_score"]+ \
        health_status["dc027s6_score"]+ health_status["dc027s7_score"]+ health_status["dc027s8_score"]+ \
        health_status["dc027s9_score"]+health_status["dc027s10_score"]+health_status["dc027s11_score"]+\
        health_status["draw_score"]
    #心理得分
    health_status["dc009_score"] = health_status["dc016"]-1
    health_status["dc010_score"] = health_status["dc017"]-1
    health_status["dc011_score"] = health_status["dc018"]-1
    health_status["dc012_score"] = health_status["dc019"]-1   
    health_status["dc013_score"] = 4 - health_status["dc020"] 
    health_status["dc014_score"] = health_status["dc021"]-1   
    health_status["dc015_score"] = health_status["dc022"]-1   
    health_status["dc016_score"] = 4 - health_status["dc023"]
    health_status["dc017_score"] = health_status["dc024"]-1   
    health_status["dc018_score"] = health_status["dc025"]-1 
    data_2020["psychiatric_score"] = health_status["dc009_score"] + health_status["dc010_score"] + health_status["dc011_score"] + \
        health_status["dc012_score"] + health_status["dc013_score"] + health_status["dc014_score"] + health_status["dc015_score"] + \
        health_status["dc016_score"] + health_status["dc017_score"] + health_status["dc018_score"]
    data_2020["wave"] = year
    change_columns(data_2020)
    data_2020 = pd.concat([data_2018, data_2020], axis=0)

    #修改地区名称
    #省份、城市名称和污染物数据格式对齐
    #海东地区->海东市
    data_2020['city'] = data_2020['city'].replace('海东地区', '海东市')
    #北京 -> 北京市
    data_2020['city'] = data_2020['city'].replace('北京', '北京市')
    data_2020['province'] = data_2020['province'].replace('北京', '北京市')
    #哈尔滨 -> 哈尔滨市
    data_2020['city'] = data_2020['city'].replace('哈尔滨', '哈尔滨市')
    #天津 -> 天津市
    data_2020['city'] = data_2020['city'].replace('天津', '天津市')
    data_2020['province'] = data_2020['province'].replace('天津', '天津市')
    #广西省 -> 广西壮族自治区
    data_2020['province'] = data_2020['province'].replace('广西省', '广西壮族自治区')
    #巢湖市 -> 合肥市
    data_2020['city'] = data_2020['city'].replace('巢湖市', '合肥市')
    #襄樊市->襄阳市
    data_2020['city'] = data_2020['city'].replace('襄樊市', '襄阳市') 
    data_2020.to_csv("/root/r_base/CHARLS/result_all_new.csv", index=False)
    print(123)