SHA1
--- a/CHARLS_P/CHARLS_NDVI.py
+++ b/CHARLS_P/CHARLS_NDVI.py
@@ -0,0 +1,26 @@
 
				+import pandas as pd
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    years = [2011, 2013,2015, 2018, 2020]
			
 
				+
			
 
				+    #读取CHARLS数据
			
 
				+    CHARLS_data = pd.read_csv("CHARLS_data_pollutants_mete.csv")
			
 
				+    CHARLS_data.to_csv("CHARLS_data_pollutants_p_n_m_nd.csv",index=False)
			
 
				+    CHARLS_data = pd.read_csv("CHARLS_data_pollutants_p_n_m_nd.csv")
			
 
				+    
			
 
				+    #读取NDVI数据
			
 
				+    ndvi_data = pd.read_excel(f"NDVI/【立方数据学社】地级市等级的逐年NDVI.xlsx")
			
 
				+
			
 
				+    for year in years:
			
 
				+        #新增两列，分别为year的去年和前年的环境值
			
 
				+        # CHARLS_data[['last_year_pm2.5', "before_last_pm2.5"]]=''
			
 
				+        #开始筛选出year的数据
			
 
				+        CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
			
 
				+        #两个表合并
			
 
				+        table_merge = pd.merge(CHARLS_data_year, ndvi_data, left_on="city", right_on="CITY", how='left')
			
 
				+        # table_merge_last.to_csv("123.csv",index=False)
			
 
				+        #更新CHARLS表
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_ndvi'] = table_merge[str(year-1)].values
			
 
				+        CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_ndvi'] = table_merge[str(year-2)].values
			
 
				+        print(year)
			
 
				+    CHARLS_data.to_csv("CHARLS_data_pollutants_p_n_m_nd.csv",index=False)
			
--- a/CHARLS_P/CHARLS_PM.py
+++ b/CHARLS_P/CHARLS_PM.py
@@ -13,9 +13,11 @@ def pollutant_handle(path):
 
				         CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
			
 
				         #两个表合并
			
 
				         table_merge = pd.merge(CHARLS_data_year, pollutants_data, on=['province', 'city'], how='left')
			
 
				-        #更新CHARLS表
			
 
				-        CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_pm10'] = table_merge[str(year-1)].values
			
 
				-        CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_pm10'] = table_merge[str(year-2)].values
			
 
				+        if str(year - 1) in table_merge.columns:
			
 
				+            #更新CHARLS表
			
 
				+            CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_pm10'] = table_merge[str(year-1)].values
			
 
				+        if str(year - 2) in table_merge.columns:
			
 
				+            CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_pm10'] = table_merge[str(year-2)].values
			
 
				         CHARLS_data.to_csv("CHARLS_data_pollutants.csv",index=False)
			
 
				         print(year)
			
 
				 
			
--- a/CHARLS_P/CHARLS_exit.py
+++ b/CHARLS_P/CHARLS_exit.py
@@ -0,0 +1,24 @@
 
				+import pandas as pd
			
 
				+import pyreadstat
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    #读取CHARLS数据
			
 
				+    CHARLS_data = pd.read_csv("CHARLS_data_pollutants_mete.csv")
			
 
				+    CHARLS_data.to_csv("CHARLS_data_pollutants_exit.csv",index=False)
			
 
				+    CHARLS_data = pd.read_csv("CHARLS_data_pollutants_exit.csv")
			
 
				+    
			
 
				+    #增加一列死亡状态
			
 
				+    #0：未死亡
			
 
				+    #1：死亡 
			
 
				+    #读取2013年的死亡数据
			
 
				+    exit, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2013/Exit_Interview.dta")
			
 
				+    exit['ID'] = pd.to_numeric(exit['ID'], errors='coerce').astype('Int64')
			
 
				+    exit["exit_year"] = exit["exb001_1"]
			
 
				+    CHARLS_data = pd.merge(CHARLS_data, exit[['ID', "exit_year"]], on = "ID", how="left")
			
 
				+
			
 
				+    #读取2020年的死亡数据
			
 
				+    exit, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2020/Exit_Module.dta")
			
 
				+    exit['ID'] = pd.to_numeric(exit['ID'], errors='coerce').astype('Int64')
			
 
				+    exit["exit_year"] = exit["exb001_1"]
			
 
				+    CHARLS_data = pd.merge(CHARLS_data, exit[['ID', "exit_year"]], on = "ID", how="left")
			
 
				+    CHARLS_data.to_csv("CHARLS_data_pollutants_exit.csv",index=False)
			
--- a/CHARLS_P/CHARLS_harmonized.py
+++ b/CHARLS_P/CHARLS_harmonized.py
@@ -0,0 +1,187 @@
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+import pyreadstat
			
 
				+
			
 
				+
			
 
				+# 定义一个函数，用于更新 harmonized 中的 mstat 列
			
 
				+def update_mstat(harmonized, col_name):
			
 
				+    harmonized[col_name] = harmonized[col_name].apply(
			
 
				+        lambda x: 1 if x in [1, 3] else 0 if x in [4, 5, 7, 8] else np.nan
			
 
				+    )
			
 
				+
			
 
				+def update_physical(harmonized):
			
 
				+    harmonized["r1phys"] = harmonized.apply(lambda x : 2 if x["r1vgact_c"]==1 else 
			
 
				+                                                             1 if x["r1mdact_c"]==1 else 
			
 
				+                                                             0 if x["r1ltact_c"] == 1 or (x["r1vgact_c"]==0 and x["r1mdact_c"]==0 and x["r1ltact_c"] == 0) 
			
 
				+                                                             else np.nan ,axis=1)
			
 
				+    harmonized["r2phys"] = harmonized.apply(lambda x : 2 if x["r2vgact_c"]==1 else 
			
 
				+                                                             1 if x["r2mdact_c"]==1 else 
			
 
				+                                                             0 if x["r2ltact_c"] == 1 or (x["r2vgact_c"]==0 and x["r2mdact_c"]==0 and x["r2ltact_c"] == 0) 
			
 
				+                                                             else np.nan ,axis=1)
			
 
				+    harmonized["r3phys"] = harmonized.apply(lambda x : 2 if x["r3vgact_c"]==1 else 
			
 
				+                                                             1 if x["r3mdact_c"]==1 else 
			
 
				+                                                             0 if x["r3ltact_c"] == 1 or (x["r3vgact_c"]==0 and x["r3mdact_c"]==0 and x["r3ltact_c"] == 0) 
			
 
				+                                                             else np.nan ,axis=1)
			
 
				+    harmonized["r4phys"] = harmonized.apply(lambda x : 2 if x["r4vgact_c"]==1 else 
			
 
				+                                                             1 if x["r4mdact_c"]==1 else 
			
 
				+                                                             0 if x["r4ltact_c"] == 1 or (x["r4vgact_c"]==0 and x["r4mdact_c"]==0 and x["r4ltact_c"] == 0) 
			
 
				+                                                             else np.nan ,axis=1)
			
 
				+def merge_data(harmonized, waves, flag="other"):
			
 
				+    merged_data = []
			
 
				+    # 遍历年份和列名，处理合并数据
			
 
				+    for wave, col_name in waves:
			
 
				+        if flag=="mstat":
			
 
				+            update_mstat(harmonized, col_name)
			
 
				+        elif flag == "phys":
			
 
				+            update_physical(harmonized)
			
 
				+        # 获取对应年份的数据，并将结果存入列表
			
 
				+        merged_data.append(pd.merge(
			
 
				+            CHARLS_data[CHARLS_data["wave"] == wave],
			
 
				+            harmonized[["ID", col_name]],
			
 
				+            on="ID",
			
 
				+            how="left"
			
 
				+        )[col_name])
			
 
				+    return merged_data
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    harmonized, meta = pyreadstat.read_dta("/root/r_base/CHARLS/Harmonized_CHARLS/H_CHARLS_D_Data.dta")
			
 
				+    CHARLS_data = pd.read_csv("CHARLS_data_pollutants_p_n_m_nd.csv")
			
 
				+    harmonized['ID'] = harmonized['ID'].astype(str)  # 转换为字符串
			
 
				+    CHARLS_data['ID'] = CHARLS_data['ID'].astype(str)  # 转换为字符串
			
 
				+    #婚姻状况
			
 
				+    # 1 married or partnered
			
 
				+    # 0 other marital status (separated, divorced, unmarried, or widowed)
			
 
				+    # 定义年份和对应的列名
			
 
				+    waves = [(2011, "r1mstat"), (2013, "r2mstat"), (2015, "r3mstat"), (2018, "r4mstat")]
			
 
				+    # 将四列数据合并为一列，并赋值给 CHARLS_data["mstat"]
			
 
				+    CHARLS_data["marital_status_m"] = pd.concat(merge_data(harmonized, waves, "mstat"), ignore_index=True)
			
 
				+
			
 
				+    #身高
			
 
				+    waves = [(2011, "r1mheight"), (2013, "r2mheight"), (2015, "r3mheight")]
			
 
				+    CHARLS_data["Height_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+
			
 
				+    #体重
			
 
				+    waves = [(2011, "r1mweight"), (2013, "r2mweight"), (2015, "r3mweight")]
			
 
				+    CHARLS_data["Weight_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+
			
 
				+    #腰围
			
 
				+    waves = [(2011, "r1mwaist"), (2013, "r2mwaist"), (2015, "r3mwaist")]
			
 
				+    CHARLS_data["waist_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+
			
 
				+    #BMI
			
 
				+    waves = [(2011, "r1mbmi"), (2013, "r2mbmi"), (2015, "r3mbmi")]
			
 
				+    CHARLS_data["BMI"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+
			
 
				+    #收缩压#舒张压
			
 
				+    waves = [(2011, "r1systo"), (2013, "r2systo"), (2015, "r3systo")]
			
 
				+    CHARLS_data["Systolic_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1diasto"), (2013, "r2diasto"), (2015, "r3diasto")]
			
 
				+    CHARLS_data["Diastolic_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+
			
 
				+    # 体力活动
			
 
				+    # 2 vigorous (vigorous activity more than once a week)
			
 
				+    # 1 moderate (moderate activity more than once a week)
			
 
				+    # 0 inactive (the rest)
			
 
				+    waves = [(2011, "r1phys"), (2013, "r2phys"), (2015, "r3phys"), (2018, "r4phys")]
			
 
				+    CHARLS_data["Physical_activity_m"] = pd.concat(merge_data(harmonized, waves, "phys"), ignore_index=True)
			
 
				+
			
 
				+    # 抽烟
			
 
				+    # 1 抽过烟
			
 
				+    # 0 没有抽过烟
			
 
				+    waves = [(2011, "r1smokev"), (2013, "r2smokev"), (2015, "r3smokev"), (2018, "r4smokev")]
			
 
				+    CHARLS_data["Smoke_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+
			
 
				+    # 喝酒
			
 
				+    # 1 喝过酒
			
 
				+    # 0 没有喝过酒
			
 
				+    waves = [(2011, "r1drinkev"), (2013, "r2drinkev"), (2015, "r3drinkev"), (2018, "r4drinkev")]
			
 
				+    CHARLS_data["Drink_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+
			
 
				+    #慢性病
			
 
				+    waves = [(2011, "r1hibpe"), (2013, "r2hibpe"), (2015, "r3hibpe"), (2018, "r4hibpe")]
			
 
				+    CHARLS_data["Hypertension_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1diabe"), (2013, "r2diabe"), (2015, "r3diabe"), (2018, "r4diabe")]
			
 
				+    CHARLS_data["Disabetes_or_High_Blood_Sugar_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1cancre"), (2013, "r2cancre"), (2015, "r3cancre"), (2018, "r4cancre")]
			
 
				+    CHARLS_data["Cancer_or_Malignant_Tumor_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1lunge"), (2013, "r2lunge"), (2015, "r3lunge"), (2018, "r4lunge")]
			
 
				+    CHARLS_data["Chronic_Lung_Diseases_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1hearte"), (2013, "r2hearte"), (2015, "r3hearte"), (2018, "r4hearte")]
			
 
				+    CHARLS_data["Heart_Problems_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1psyche"), (2013, "r2psyche"), (2015, "r3psyche"), (2018, "r4psyche")]
			
 
				+    CHARLS_data["Emotional_Nervous_or_Psychiatric_Problems_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1stroke"), (2013, "r2stroke"), (2015, "r3stroke"), (2018, "r4stroke")]
			
 
				+    CHARLS_data["Stroke_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1arthre"), (2013, "r2arthre"), (2015, "r3arthre"), (2018, "r4arthre")]
			
 
				+    CHARLS_data["Arthritis_or_Rheumatism_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1dyslipe"), (2013, "r2dyslipe"), (2015, "r3dyslipe"), (2018, "r4dyslipe")]
			
 
				+    CHARLS_data["Dyslipidemia_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1livere"), (2013, "r2livere"), (2015, "r3livere"), (2018, "r4livere")]
			
 
				+    CHARLS_data["Liver_Disease_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1kidneye"), (2013, "r2kidneye"), (2015, "r3kidneye"), (2018, "r4kidneye")]
			
 
				+    CHARLS_data["Kidney_Diease_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1digeste"), (2013, "r2digeste"), (2015, "r3digeste"), (2018, "r4digeste")]
			
 
				+    CHARLS_data["Stomach_or_Other_Digestive_Disease_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1asthmae"), (2013, "r2asthmae"), (2015, "r3asthmae"), (2018, "r4asthmae")]
			
 
				+    CHARLS_data["Asthma_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1memrye"), (2013, "r2memrye"), (2015, "r3memrye"), (2018, "r4memrye")]
			
 
				+    CHARLS_data["Memory_Related_Disease_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+
			
 
				+    #心理评分
			
 
				+    waves = [(2011, "s1cesd10"), (2013, "s2cesd10"), (2015, "s3cesd10"), (2018, "s4cesd10")]
			
 
				+    CHARLS_data["Psychiatric_score_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+
			
 
				+    #睡眠状态
			
 
				+    waves = [(2011, "r1sleeprl"), (2013, "r2sleeprl"), (2015, "r3sleeprl"), (2018, "r4sleeprl")]
			
 
				+    CHARLS_data["sleep_state_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+
			
 
				+    #计算认知功能得分，分成三部分：电话问卷9分，词语回忆10分、画图1分
			
 
				+    waves = [(2011, "r1orient"), (2013, "r2orient"), (2015, "r3orient"), (2018, "r4orient")]
			
 
				+    CHARLS_data["Date_Naming"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1imrc"), (2013, "r2imrc"), (2015, "r3imrc"), (2018, "r4imrc")]
			
 
				+    CHARLS_data["Immediate_Word_Recall"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1dlrc"), (2013, "r2dlrc"), (2015, "r3dlrc"), (2018, "r4dlrc")]
			
 
				+    CHARLS_data["Delayed_Word_Recall"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1ser7"), (2013, "r2ser7"), (2015, "r3ser7"), (2018, "r4ser7")]
			
 
				+    CHARLS_data["Serial_7"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+    waves = [(2011, "r1draw"), (2013, "r2draw"), (2015, "r3draw"), (2018, "r4draw")]
			
 
				+    CHARLS_data["Drawing_Picture"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
			
 
				+
			
 
				+    CHARLS_data["Cognition_score_m"] = CHARLS_data["Date_Naming"] + CHARLS_data["Immediate_Word_Recall"] + CHARLS_data["Delayed_Word_Recall"] + CHARLS_data["Serial_7"] + CHARLS_data["Drawing_Picture"]
			
 
				+
			
 
				+    # 整体合并的：性别，出生年，教育
			
 
				+    #教育
			
 
				+    # 0 below high school
			
 
				+    # 1 high school
			
 
				+    # 2 college or above
			
 
				+    harmonized["raeduc_c"] = harmonized["raeduc_c"].apply(lambda x : 1 if x == 6 or x == 7 else 2 if x in [8, 9, 10] else 0 if x in [1,2,3,4,5] else np.nan)
			
 
				+    CHARLS_data = pd.merge(CHARLS_data, harmonized[["ID", "ragender", "rabyear", "raeduc_c"]], on='ID', how='left')
			
 
				+
			
 
				+    #合并
			
 
				+    merge_list = ["marital_status_m",	"Height_m",	"Weight_m",	"waist_m",	"Systolic_m",	"Diastolic_m",
			
 
				+                  	"Physical_activity_m",	"Smoke_m",	'Drink_m',	'Hypertension_m',	'Disabetes_or_High_Blood_Sugar_m',
			
 
				+                    'Cancer_or_Malignant_Tumor_m',	'Chronic_Lung_Diseases_m',	'Heart_Problems_m',	'Emotional_Nervous_or_Psychiatric_Problems_m',
			
 
				+                	'Stroke_m',	'Arthritis_or_Rheumatism_m',	'Dyslipidemia_m',	'Liver_Disease_m',	'Kidney_Diease_m',	'Stomach_or_Other_Digestive_Disease_m',
			
 
				+                	'Asthma_m',	'Memory_Related_Disease_m',	'Psychiatric_score_m',	'sleep_state_m', 'Cognition_score_m']
			
 
				+    
			
 
				+    # 遍历 merge_list 列表
			
 
				+    for col_m in merge_list:
			
 
				+        col = col_m.replace('_m', '')  # 去掉 '_m' 得到相应的列名
			
 
				+        if col in CHARLS_data.columns and col_m in CHARLS_data.columns:
			
 
				+            CHARLS_data[col] = CHARLS_data[col_m].fillna(CHARLS_data[col])
			
 
				+
			
 
				+    # 处理慢性病标准不一样，将2变为0
			
 
				+    chronic_disease = ['Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases', 
			
 
				+                  'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease', 
			
 
				+                  'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma']
			
 
				+    CHARLS_data[chronic_disease] = CHARLS_data[chronic_disease].replace(2, 0)
			
 
				+
			
 
				+    #处理"ragender", "rabyear", "raeduc_c"
			
 
				+    common_new_list = ["ragender", "rabyear", "raeduc_c"]
			
 
				+    common_list = ["rgender", "birth_year", "education"]
			
 
				+    for col_m, col in zip(common_new_list, common_list):
			
 
				+        if col in CHARLS_data.columns and col_m in CHARLS_data.columns:
			
 
				+            CHARLS_data[col] = CHARLS_data[col_m].fillna(CHARLS_data[col])
			
 
				+
			
 
				+    CHARLS_data = CHARLS_data.drop(columns=["Date_Naming", "Immediate_Word_Recall", "Delayed_Word_Recall", "Serial_7", "Drawing_Picture"] + merge_list+ common_new_list)
			
 
				+    CHARLS_data.to_csv("CHARLS_data_pollutants_p_n_m_nd_h.csv", index=False)
			
--- a/CHARLS_P/CHARLS_preprocess_main.py
+++ b/CHARLS_P/CHARLS_preprocess_main.py
@@ -134,12 +134,12 @@ if __name__ == "__main__":
 
				     data_2011 = pd.merge(data_2011, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
			
 
				 
			
 
				     
			
 
				-    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆20分、画图1分
			
 
				+    #计算认知功能得分，分成三部分：电话问卷9分，词语回忆20分、画图1分
			
 
				     health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				     health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
			
 
				     health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
			
 
				     health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				-    health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    # health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				     health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
			
 
				     health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
			
 
				     health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
			
@@ -173,7 +173,7 @@ if __name__ == "__main__":
 
				     health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
			
 
				 
			
 
				     data_2011["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
			
 
				-        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
			
 
				+        health_status["dc001s3_score"] + health_status["dc002_score"]+ \
			
 
				         health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
			
 
				         health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
			
 
				         health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
			
@@ -354,12 +354,12 @@ if __name__ == "__main__":
 
				     
			
 
				     data_2013 = pd.merge(data_2013, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
			
 
				 
			
 
				-    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
			
 
				+    #计算认知功能得分，分成三部分：电话问卷9分，词语回忆10分、画图1分
			
 
				     health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				     health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
			
 
				     health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
			
 
				     health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				-    health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    # health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				     health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
			
 
				     health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
			
 
				     health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
			
@@ -393,7 +393,7 @@ if __name__ == "__main__":
 
				     health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
			
 
				 
			
 
				     data_2013["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
			
 
				-        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
			
 
				+        health_status["dc001s3_score"] + health_status["dc002_score"]+ \
			
 
				         health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
			
 
				         health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
			
 
				         health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
			
@@ -550,12 +550,12 @@ if __name__ == "__main__":
 
				     
			
 
				     data_2015 = pd.merge(data_2015, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
			
 
				 
			
 
				-    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
			
 
				+    #计算认知功能得分，分成三部分：电话问卷9分，词语回忆10分、画图1分
			
 
				     health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				     health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
			
 
				     health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
			
 
				     health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				-    health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				+    # health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
			
 
				     health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
			
 
				     health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
			
 
				     health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
			
@@ -589,7 +589,7 @@ if __name__ == "__main__":
 
				     health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
			
 
				 
			
 
				     data_2015["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
			
 
				-        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
			
 
				+        health_status["dc001s3_score"] + health_status["dc002_score"]+ \
			
 
				         health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
			
 
				         health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
			
 
				         health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
			
@@ -723,12 +723,12 @@ if __name__ == "__main__":
 
				 
			
 
				     data_2018 = pd.merge(data_2018, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
			
 
				 
			
 
				-    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
			
 
				+    #计算认知功能得分，分成三部分：电话问卷9分，词语回忆10分、画图1分
			
 
				     cognition["dc001s1_score"] = cognition["dc001_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				     cognition["dc001s2_score"] = cognition["dc006_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				     cognition["dc001s3_score"] = cognition["dc003_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				     cognition["dc002_score"] = cognition["dc005_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				-    cognition["dc003_score"] = cognition["dc002_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				+    # cognition["dc003_score"] = cognition["dc002_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				     cognition["dc019_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_1"]==97 else 1 if pd.isna(x["dc014_w4_1"]) and x["dc014_w4_1_1"]==93 else 0 if pd.isna(x["dc014_w4_1"]) and (not x["dc014_w4_1_1"]==93) else np.nan, axis=1) 
			
 
				     cognition["dc020_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_2"]==97 else 1 if pd.isna(x["dc014_w4_2"]) and x["dc014_w4_2_1"]==86 else 0 if pd.isna(x["dc014_w4_2"]) and (not x["dc014_w4_2_1"]==86) else np.nan, axis=1) 
			
 
				     cognition["dc021_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_3"]==97 else 1 if pd.isna(x["dc014_w4_3"]) and x["dc014_w4_3_1"]==79 else 0 if pd.isna(x["dc014_w4_3"]) and (not x["dc014_w4_3_1"]==79) else np.nan, axis=1)
			
@@ -762,7 +762,7 @@ if __name__ == "__main__":
 
				     cognition["draw_score"] = cognition["dc024_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				 
			
 
				     data_2018["Cognition_score"] = cognition["dc001s1_score"] + cognition["dc001s2_score"] + \
			
 
				-        cognition["dc001s3_score"] + cognition["dc002_score"]+ cognition["dc003_score"]+ \
			
 
				+        cognition["dc001s3_score"] + cognition["dc002_score"]+ \
			
 
				         cognition["dc019_score"]+ cognition["dc020_score"] + cognition["dc021_score"]+ \
			
 
				         cognition["dc022_score"]+ cognition["dc023_score"] + cognition["dc006s1_score"] + \
			
 
				         cognition["dc006s2_score"] + cognition["dc006s3_score"] + cognition["dc006s4_score"] + \
			
@@ -906,7 +906,7 @@ if __name__ == "__main__":
 
				     
			
 
				     data_2020 = pd.merge(data_2020, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
			
 
				 
			
 
				-    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
			
 
				+    #计算认知功能得分，分成三部分：电话问卷9分，词语回忆10分、画图1分
			
 
				     health_status["dc001s1_score"] = health_status["dc001"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
			
 
				     health_status["dc001s2_score"] = health_status["dc005"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
			
 
				     health_status["dc001s3_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
			
@@ -943,7 +943,7 @@ if __name__ == "__main__":
 
				     health_status["draw_score"] = health_status["dc009"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
			
 
				 
			
 
				     data_2020["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
			
 
				-        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
			
 
				+        health_status["dc001s3_score"] + health_status["dc002_score"]+ \
			
 
				         health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
			
 
				         health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
			
 
				         health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
			
--- a/CHARLS_P/CHARLS_split.py
+++ b/CHARLS_P/CHARLS_split.py
@@ -20,6 +20,11 @@ if __name__ == "__main__":
 
				     disease_features = ['Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases', 
			
 
				                   'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease', 
			
 
				                   'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma']
			
 
				+    
			
 
				+    # 夜光暴露与空气污染对慢性非传染性疾病（高血压、心脏病、糖尿病、肥胖、中风、、关节炎、癌症和记忆相关疾病）的交互影响
			
 
				+    one_data = data.loc[:, base_feature + pollutant_feature + nl_feature + disease_features]
			
 
				+    one_data.to_csv("1.csv", index=False)
			
 
				+    
			
 
				     #夜光暴露与空气污染对老年人认知功能的交互影响及炎症和氧化应激的中介效应
			
 
				     one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature + blood_features+["Cognition_score", "Memory_Related_Disease"]]
			
 
				     one_data.to_csv("2.csv", index=False)
			
@@ -65,10 +70,12 @@ if __name__ == "__main__":
 
				     one_data.to_csv("12.csv", index=False)   
			
 
				 
			
 
				     # 13.夜光暴露与空气污染对睡眠障碍的交互影响及炎症和氧化应激的中介效应
			
 
				-    
			
 
				+    one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature + blood_features + ["sleep_state"]]
			
 
				+    one_data.to_csv("13.csv", index=False)
			
 
				 
			
 
				     # 14.基于贝叶斯网络的夜光暴露、空气污染与肥胖相关指标（BMI、锥度指数、相对脂肪质量指数等）对疾病发病风险的预测作用
			
 
				-
			
 
				+    one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2013) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature + ["Height", "Weight", "waist"]]
			
 
				+    one_data.to_csv("14.csv", index=False)
			
 
				 
			
 
				     # 15.夜光与空气污染暴露变化轨迹（潜增长模型等）与疾病的关联性研究
			
 
				     one_data = data.loc[:, base_feature + pollutant_feature + nl_feature + disease_features]
			
--- a/CHARLS_P/chongqing_pm.py
+++ b/CHARLS_P/chongqing_pm.py
@@ -3,7 +3,7 @@ from glob import glob
 
				 import os
			
 
				 
			
 
				 def pollutant_chongqing_handle():
			
 
				-    path = "result_O3"
			
 
				+    path = "pollution/result_SO2"
			
 
				     data = pd.read_csv(path+".csv")
			
 
				     # 找到province列等于'重庆市'的行
			
 
				     chongqing_rows = data[data['province'] == '重庆市']
			
@@ -35,4 +35,5 @@ def aba_chongqing_handle():
 
				         df.to_csv(path+file_name+"_p"+extension, index=False)
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    aba_chongqing_handle()
			
 
				+    pollutant_chongqing_handle()
			
 
				+    # aba_chongqing_handle()
			
--- a/CHARLS_P/nc2geotiff.py
+++ b/CHARLS_P/nc2geotiff.py
@@ -14,12 +14,12 @@ import concurrent.futures
 
				 # from osgeo import osr
			
 
				 
			
 
				 #Define work and output paths
			
 
				-WorkPath = r'/root/r_base/O3'
			
 
				+WorkPath = r'/root/r_base/pollution/SO2'
			
 
				 OutPath  = WorkPath
			
 
				 
			
 
				 #Define air pollutant type 
			
 
				 #e.g., PM1, PM2.5, PM10, O3, NO2, SO2, and CO, et al.
			
 
				-AP = 'O3'
			
 
				+AP = 'SO2'
			
 
				 
			
 
				 #Define spatial resolution 
			
 
				 #e.g., 1 km ≈ 0.01 Degree
			
--- a/README.md
+++ b/README.md
@@ -7,4 +7,7 @@ AreaCity-Query-Geometry中是对坐标数据进行处理的程序
 
				 
			
 
				 1. 什么是共病，如何定义共病，同时患有两种疾病以上就叫共病？
			
 
				 2. 结局变量为分类变量，状态之间可以相互转化，共病的状态如何定义？
			
 
				-3. 设置状态：健康状态：无任何共病症状的人群。单一疾病状态：患有某一种特定疾病的人群。可以分为不同的疾病类别，如心理疾病（如焦虑、抑郁）或生理疾病（如高血压、糖尿病）。多种疾病共存状态：同时患有两种或多种疾病的状态。例如，焦虑和失眠同时存在，或者糖尿病和心血管疾病共存。恶化状态：共病症状逐渐恶化，病情加重的状态。这一状态通常会与疾病的进展或夜光暴露程度相关。缓解状态：共病症状缓解，患者恢复部分或完全健康的状态。死亡状态：
			
 
				+3. 设置状态：
			
 
				+4. 健康状态：无任何共病症状的人群。
			
 
				+5. 单一疾病状态：患有某一种特定疾病的人群。可以分为不同的疾病类别，如心理疾病（如焦虑、抑郁）或生理疾病（如高血压、糖尿病）。
			
 
				+6. 多种疾病共存状态：同时患有两种或多种疾病的状态。例如，焦虑和失眠同时存在，或者糖尿病和心血管疾病共存。
			
--- a/paper_code/code.R
+++ b/paper_code/code.R
@@ -0,0 +1,60 @@
 
				+# install.packages("msm", repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")
			
 
				+
			
 
				+library(msm)
			
 
				+library(survival)
			
 
				+
			
 
				+# data <- data.frame(
			
 
				+#   ID = c(1, 1, 1, 2, 2, 2),        # 人员ID
			
 
				+#   time = c(0, 1, 2, 0, 1, 2),      # 随访时间
			
 
				+#   state = c(1, 2, 3, 1, 1, 2),     # 疾病状态
			
 
				+#   birth_year = c(1970, 1970, 1970, 1980, 1980, 1980),
			
 
				+#   gender = c(1, 1, 1, 2, 2, 2),    # 性别
			
 
				+#   education = c(3, 3, 3, 2, 2, 2)  # 教育程度
			
 
				+# )
			
 
				+# statetable.msm(state, ID, data = data)
			
 
				+
			
 
				+# qmatrix_init <- matrix(c(-0.5, 0.25, 0.25,
			
 
				+#                          0.1, -0.3, 0.2,
			
 
				+#                          0, 0, 0), 
			
 
				+#                        nrow = 3, byrow = TRUE)
			
 
				+
			
 
				+# msm_model <- msm(state ~ time, subject = ID, data = data,
			
 
				+#                  qmatrix = qmatrix_init, 
			
 
				+#                  covariates = ~ gender + education)
			
 
				+# pmatrix.msm(msm_model, t = 1)  # t = 1 代表随访之间的间隔时间
			
 
				+# summary(msm_model)
			
 
				+
			
 
				+# 创建数据框
			
 
				+data <- data.frame(
			
 
				+  ID = c(1, 1, 1, 2, 2, 2),        # 人员ID
			
 
				+  time = c(0, 1, 2, 0, 1, 2),      # 随访时间
			
 
				+  state = c(1, 2, 3, 1, 1, 2),     # 疾病状态
			
 
				+  birth_year = c(1970, 1970, 1970, 1980, 1980, 1980), # 出生年份
			
 
				+  gender = c(1, 1, 1, 2, 2, 2),    # 性别
			
 
				+  education = c(3, 3, 3, 2, 2, 2)  # 教育程度
			
 
				+)
			
 
				+
			
 
				+# 计算状态转移频数表
			
 
				+freq_table <- statetable.msm(state, ID, data = data)
			
 
				+print(freq_table)
			
 
				+
			
 
				+# 初始化转移速率矩阵
			
 
				+qmatrix_init <- matrix(c(-0.5, 0.25, 0.25,
			
 
				+                          0.1, -0.3, 0.2,
			
 
				+                          0.3, 0.2, -0.5), 
			
 
				+                        nrow = 3, byrow = TRUE)
			
 
				+
			
 
				+# 创建初始模型
			
 
				+crude_init <- crudeinits.msm(state ~ time+ gender, subject = ID, data = data, qmatrix = qmatrix_init)
			
 
				+
			
 
				+# 进行多状态模型分析
			
 
				+msm_model <- msm(state ~ time, subject = ID, data = data,
			
 
				+                 qmatrix = crude_init,
			
 
				+                 covariates = ~ gender+education)
			
 
				+
			
 
				+# 计算状态转移概率矩阵
			
 
				+prob_matrix <- pmatrix.msm(msm_model, t = 1)  # t = 1 代表随访之间的间隔时间
			
 
				+print(prob_matrix)
			
 
				+
			
 
				+# 查看模型的详细结果
			
 
				+summary(msm_model)
			
--- a/test.py
+++ b/test.py
@@ -75,59 +75,76 @@ import pandas as pd
 
				 #     except Exception as e:
			
 
				 #         print(f"Error reading {file_path}: {e}")
			
 
				 # print(num)
			
 
				-import pyreadstat
			
 
				-import numpy as np
			
 
				-
			
 
				-year = "2018"
			
 
				-cognition, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Cognition.dta")
			
 
				-#计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
			
 
				-cognition["dc001s1_score"] = cognition["dc001_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				-cognition["dc001s2_score"] = cognition["dc006_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				-cognition["dc001s3_score"] = cognition["dc003_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				-cognition["dc002_score"] = cognition["dc005_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				-cognition["dc003_score"] = cognition["dc002_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				-cognition["dc019_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_1"]==97 else 1 if pd.isna(x["dc014_w4_1"]) and x["dc014_w4_1_1"]==93 else 0 if pd.isna(x["dc014_w4_1"]) and (not x["dc014_w4_1_1"]==93) else np.nan, axis=1) 
			
 
				-cognition["dc020_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_2"]==97 else 1 if pd.isna(x["dc014_w4_2"]) and x["dc014_w4_2_1"]==86 else 0 if pd.isna(x["dc014_w4_2"]) and (not x["dc014_w4_2_1"]==86) else np.nan, axis=1) 
			
 
				-cognition["dc021_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_3"]==97 else 1 if pd.isna(x["dc014_w4_3"]) and x["dc014_w4_3_1"]==79 else 0 if pd.isna(x["dc014_w4_3"]) and (not x["dc014_w4_3_1"]==79) else np.nan, axis=1)
			
 
				-cognition["dc022_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_4"]==97 else 1 if pd.isna(x["dc014_w4_4"]) and x["dc014_w4_4_1"]==72 else 0 if pd.isna(x["dc014_w4_4"]) and (not x["dc014_w4_4_1"]==72) else np.nan, axis=1)
			
 
				-cognition["dc023_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_5"]==97 else 1 if pd.isna(x["dc014_w4_5"]) and x["dc014_w4_5_1"]==65 else 0 if pd.isna(x["dc014_w4_5"]) and (not x["dc014_w4_5_1"]==65) else np.nan, axis=1)
			
 
				-
			
 
				-#词语记忆
			
 
				-cognition["dc006s1_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s1"]==1 else 0, axis=1)
			
 
				-cognition["dc006s2_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s2"]==2 else 0, axis=1)
			
 
				-cognition["dc006s3_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s3"]==3 else 0, axis=1)
			
 
				-cognition["dc006s4_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s4"]==4 else 0, axis=1) 
			
 
				-cognition["dc006s5_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s5"]==5 else 0, axis=1) 
			
 
				-cognition["dc006s6_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s6"]==6 else 0, axis=1)                                            
			
 
				-cognition["dc006s7_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s7"]==7 else 0, axis=1) 
			
 
				-cognition["dc006s8_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s8"]==8 else 0, axis=1) 
			
 
				-cognition["dc006s9_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s9"]==9 else 0, axis=1)                                            
			
 
				-cognition["dc006s10_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s10"]==10 else 0, axis=1)                                           
			
 
				-# cognition["dc006s11_score"] = cognition["dc028_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
			
 
				-cognition["dc027s1_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s1"]==1 else 0, axis=1) 
			
 
				-cognition["dc027s2_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s2"]==2 else 0, axis=1) 
			
 
				-cognition["dc027s3_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s3"]==3 else 0, axis=1) 
			
 
				-cognition["dc027s4_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s4"]==4 else 0, axis=1) 
			
 
				-cognition["dc027s5_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s5"]==5 else 0, axis=1) 
			
 
				-cognition["dc027s6_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s6"]==6 else 0, axis=1)                                            
			
 
				-cognition["dc027s7_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s7"]==7 else 0, axis=1) 
			
 
				-cognition["dc027s8_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s8"]==8 else 0, axis=1) 
			
 
				-cognition["dc027s9_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s9"]==9 else 0, axis=1)                                            
			
 
				-cognition["dc027s10_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s10"]==10 else 0, axis=1)                                            
			
 
				-# cognition["dc027s11_score"] = cognition["dc047_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
			
 
				-#画图
			
 
				-cognition["draw_score"] = cognition["dc024_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				-
			
 
				-cognition["Cognition_score"] = cognition["dc001s1_score"] + cognition["dc001s2_score"] + \
			
 
				-    cognition["dc001s3_score"] + cognition["dc002_score"]+ cognition["dc003_score"]+ \
			
 
				-    cognition["dc019_score"]+ cognition["dc020_score"] + cognition["dc021_score"]+ \
			
 
				-    cognition["dc022_score"]+ cognition["dc023_score"] + cognition["dc006s1_score"] + \
			
 
				-    cognition["dc006s2_score"] + cognition["dc006s3_score"] + cognition["dc006s4_score"] + \
			
 
				-    cognition["dc006s5_score"] + cognition["dc006s6_score"] + cognition["dc006s7_score"] + \
			
 
				-    cognition["dc006s8_score"] + cognition["dc006s9_score"] + cognition["dc006s10_score"] + \
			
 
				-    cognition["dc027s1_score"]+ cognition["dc027s2_score"]+ \
			
 
				-    cognition["dc027s3_score"]+ cognition["dc027s4_score"]+ cognition["dc027s5_score"]+ \
			
 
				-    cognition["dc027s6_score"]+ cognition["dc027s7_score"]+ cognition["dc027s8_score"]+ \
			
 
				-    cognition["dc027s9_score"]+cognition["dc027s10_score"]+\
			
 
				-    cognition["draw_score"]
			
 
				-cognition.to_csv("/root/r_base/CHARLS/test.csv")
			
 
				+# import pyreadstat
			
 
				+# import numpy as np
			
 
				+
			
 
				+# year = "2018"
			
 
				+# cognition, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Cognition.dta")
			
 
				+# #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
			
 
				+# cognition["dc001s1_score"] = cognition["dc001_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				+# cognition["dc001s2_score"] = cognition["dc006_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				+# cognition["dc001s3_score"] = cognition["dc003_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				+# cognition["dc002_score"] = cognition["dc005_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				+# cognition["dc003_score"] = cognition["dc002_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				+# cognition["dc019_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_1"]==97 else 1 if pd.isna(x["dc014_w4_1"]) and x["dc014_w4_1_1"]==93 else 0 if pd.isna(x["dc014_w4_1"]) and (not x["dc014_w4_1_1"]==93) else np.nan, axis=1) 
			
 
				+# cognition["dc020_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_2"]==97 else 1 if pd.isna(x["dc014_w4_2"]) and x["dc014_w4_2_1"]==86 else 0 if pd.isna(x["dc014_w4_2"]) and (not x["dc014_w4_2_1"]==86) else np.nan, axis=1) 
			
 
				+# cognition["dc021_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_3"]==97 else 1 if pd.isna(x["dc014_w4_3"]) and x["dc014_w4_3_1"]==79 else 0 if pd.isna(x["dc014_w4_3"]) and (not x["dc014_w4_3_1"]==79) else np.nan, axis=1)
			
 
				+# cognition["dc022_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_4"]==97 else 1 if pd.isna(x["dc014_w4_4"]) and x["dc014_w4_4_1"]==72 else 0 if pd.isna(x["dc014_w4_4"]) and (not x["dc014_w4_4_1"]==72) else np.nan, axis=1)
			
 
				+# cognition["dc023_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_5"]==97 else 1 if pd.isna(x["dc014_w4_5"]) and x["dc014_w4_5_1"]==65 else 0 if pd.isna(x["dc014_w4_5"]) and (not x["dc014_w4_5_1"]==65) else np.nan, axis=1)
			
 
				+
			
 
				+# #词语记忆
			
 
				+# cognition["dc006s1_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s1"]==1 else 0, axis=1)
			
 
				+# cognition["dc006s2_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s2"]==2 else 0, axis=1)
			
 
				+# cognition["dc006s3_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s3"]==3 else 0, axis=1)
			
 
				+# cognition["dc006s4_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s4"]==4 else 0, axis=1) 
			
 
				+# cognition["dc006s5_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s5"]==5 else 0, axis=1) 
			
 
				+# cognition["dc006s6_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s6"]==6 else 0, axis=1)                                            
			
 
				+# cognition["dc006s7_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s7"]==7 else 0, axis=1) 
			
 
				+# cognition["dc006s8_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s8"]==8 else 0, axis=1) 
			
 
				+# cognition["dc006s9_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s9"]==9 else 0, axis=1)                                            
			
 
				+# cognition["dc006s10_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s10"]==10 else 0, axis=1)                                           
			
 
				+# # cognition["dc006s11_score"] = cognition["dc028_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
			
 
				+# cognition["dc027s1_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s1"]==1 else 0, axis=1) 
			
 
				+# cognition["dc027s2_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s2"]==2 else 0, axis=1) 
			
 
				+# cognition["dc027s3_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s3"]==3 else 0, axis=1) 
			
 
				+# cognition["dc027s4_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s4"]==4 else 0, axis=1) 
			
 
				+# cognition["dc027s5_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s5"]==5 else 0, axis=1) 
			
 
				+# cognition["dc027s6_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s6"]==6 else 0, axis=1)                                            
			
 
				+# cognition["dc027s7_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s7"]==7 else 0, axis=1) 
			
 
				+# cognition["dc027s8_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s8"]==8 else 0, axis=1) 
			
 
				+# cognition["dc027s9_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s9"]==9 else 0, axis=1)                                            
			
 
				+# cognition["dc027s10_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s10"]==10 else 0, axis=1)                                            
			
 
				+# # cognition["dc027s11_score"] = cognition["dc047_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
			
 
				+# #画图
			
 
				+# cognition["draw_score"] = cognition["dc024_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
			
 
				+
			
 
				+# cognition["Cognition_score"] = cognition["dc001s1_score"] + cognition["dc001s2_score"] + \
			
 
				+#     cognition["dc001s3_score"] + cognition["dc002_score"]+ cognition["dc003_score"]+ \
			
 
				+#     cognition["dc019_score"]+ cognition["dc020_score"] + cognition["dc021_score"]+ \
			
 
				+#     cognition["dc022_score"]+ cognition["dc023_score"] + cognition["dc006s1_score"] + \
			
 
				+#     cognition["dc006s2_score"] + cognition["dc006s3_score"] + cognition["dc006s4_score"] + \
			
 
				+#     cognition["dc006s5_score"] + cognition["dc006s6_score"] + cognition["dc006s7_score"] + \
			
 
				+#     cognition["dc006s8_score"] + cognition["dc006s9_score"] + cognition["dc006s10_score"] + \
			
 
				+#     cognition["dc027s1_score"]+ cognition["dc027s2_score"]+ \
			
 
				+#     cognition["dc027s3_score"]+ cognition["dc027s4_score"]+ cognition["dc027s5_score"]+ \
			
 
				+#     cognition["dc027s6_score"]+ cognition["dc027s7_score"]+ cognition["dc027s8_score"]+ \
			
 
				+#     cognition["dc027s9_score"]+cognition["dc027s10_score"]+\
			
 
				+#     cognition["draw_score"]
			
 
				+# cognition.to_csv("/root/r_base/CHARLS/test.csv")
			
 
				+
			
 
				+import pandas as pd
			
 
				+CHARLS_data = pd.read_csv("CHARLS_data_pollutants_p_n_m_nd_h.csv")
			
 
				+#合并
			
 
				+merge_list = ["marital_status_m",	"Height_m",	"Weight_m",	"waist_m",	"Systolic_m",	"Diastolic_m",
			
 
				+                "Physical_activity_m",	"Smoke_m",	'Drink_m',	'Hypertension_m',	'Disabetes_or_High_Blood_Sugar_m',
			
 
				+                'Cancer_or_Malignant_Tumor_m',	'Chronic_Lung_Diseases_m',	'Heart_Problems_m',	'Emotional_Nervous_or_Psychiatric_Problems_m',
			
 
				+                'Stroke_m',	'Arthritis_or_Rheumatism_m',	'Dyslipidemia_m',	'Liver_Disease_m',	'Kidney_Diease_m',	'Stomach_or_Other_Digestive_Disease_m',
			
 
				+                'Asthma_m',	'Memory_Related_Disease_m',	'Psychiatric_score_m',	'sleep_state_m', 'Cognition_score_m']
			
 
				+
			
 
				+# 遍历 merge_list 列表
			
 
				+for col_m in merge_list:
			
 
				+    col = col_m.replace('_m', '')  # 去掉 '_m' 得到相应的列名
			
 
				+    if col in CHARLS_data.columns and col_m in CHARLS_data.columns:
			
 
				+        CHARLS_data[col] = CHARLS_data[col_m].fillna(CHARLS_data[col])
			
 
				+
			
 
				+CHARLS_data.to_csv("CHARLS_data_pollutants_p_n_m_nd_h_test.csv")
Эзэн	SHA1 Мессеж	Огноо
JazzZhao	3f9500c2be CHARLS数据库优化	1 сар өмнө
JazzZhao	7af394ed45 调整认知的得分	1 сар өмнө