SHA1
--- a/CHARLS_P/CHARLS_NDVI.py
+++ b/CHARLS_P/CHARLS_NDVI.py
@@ -0,0 +1,26 @@
 
															+import pandas as pd
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    years = [2011, 2013,2015, 2018, 2020]
														
 
															+
														
 
															+    #读取CHARLS数据
														
 
															+    CHARLS_data = pd.read_csv("CHARLS_data_pollutants_mete.csv")
														
 
															+    CHARLS_data.to_csv("CHARLS_data_pollutants_p_n_m_nd.csv",index=False)
														
 
															+    CHARLS_data = pd.read_csv("CHARLS_data_pollutants_p_n_m_nd.csv")
														
 
															+    
														
 
															+    #读取NDVI数据
														
 
															+    ndvi_data = pd.read_excel(f"NDVI/【立方数据学社】地级市等级的逐年NDVI.xlsx")
														
 
															+
														
 
															+    for year in years:
														
 
															+        #新增两列，分别为year的去年和前年的环境值
														
 
															+        # CHARLS_data[['last_year_pm2.5', "before_last_pm2.5"]]=''
														
 
															+        #开始筛选出year的数据
														
 
															+        CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
														
 
															+        #两个表合并
														
 
															+        table_merge = pd.merge(CHARLS_data_year, ndvi_data, left_on="city", right_on="CITY", how='left')
														
 
															+        # table_merge_last.to_csv("123.csv",index=False)
														
 
															+        #更新CHARLS表
														
 
															+        CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_ndvi'] = table_merge[str(year-1)].values
														
 
															+        CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_ndvi'] = table_merge[str(year-2)].values
														
 
															+        print(year)
														
 
															+    CHARLS_data.to_csv("CHARLS_data_pollutants_p_n_m_nd.csv",index=False)
														
--- a/CHARLS_P/CHARLS_PM.py
+++ b/CHARLS_P/CHARLS_PM.py
@@ -13,9 +13,11 @@ def pollutant_handle(path):
 
															         CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
														
 
															         #两个表合并
														
 
															         table_merge = pd.merge(CHARLS_data_year, pollutants_data, on=['province', 'city'], how='left')
														
 
															-        #更新CHARLS表
														
 
															-        CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_pm10'] = table_merge[str(year-1)].values
														
 
															-        CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_pm10'] = table_merge[str(year-2)].values
														
 
															+        if str(year - 1) in table_merge.columns:
														
 
															+            #更新CHARLS表
														
 
															+            CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_pm10'] = table_merge[str(year-1)].values
														
 
															+        if str(year - 2) in table_merge.columns:
														
 
															+            CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_pm10'] = table_merge[str(year-2)].values
														
 
															         CHARLS_data.to_csv("CHARLS_data_pollutants.csv",index=False)
														
 
															         print(year)
														
--- a/CHARLS_P/CHARLS_exit.py
+++ b/CHARLS_P/CHARLS_exit.py
@@ -0,0 +1,24 @@
 
															+import pandas as pd
														
 
															+import pyreadstat
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    #读取CHARLS数据
														
 
															+    CHARLS_data = pd.read_csv("CHARLS_data_pollutants_mete.csv")
														
 
															+    CHARLS_data.to_csv("CHARLS_data_pollutants_exit.csv",index=False)
														
 
															+    CHARLS_data = pd.read_csv("CHARLS_data_pollutants_exit.csv")
														
 
															+    
														
 
															+    #增加一列死亡状态
														
 
															+    #0：未死亡
														
 
															+    #1：死亡 
														
 
															+    #读取2013年的死亡数据
														
 
															+    exit, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2013/Exit_Interview.dta")
														
 
															+    exit['ID'] = pd.to_numeric(exit['ID'], errors='coerce').astype('Int64')
														
 
															+    exit["exit_year"] = exit["exb001_1"]
														
 
															+    CHARLS_data = pd.merge(CHARLS_data, exit[['ID', "exit_year"]], on = "ID", how="left")
														
 
															+
														
 
															+    #读取2020年的死亡数据
														
 
															+    exit, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS2020/Exit_Module.dta")
														
 
															+    exit['ID'] = pd.to_numeric(exit['ID'], errors='coerce').astype('Int64')
														
 
															+    exit["exit_year"] = exit["exb001_1"]
														
 
															+    CHARLS_data = pd.merge(CHARLS_data, exit[['ID', "exit_year"]], on = "ID", how="left")
														
 
															+    CHARLS_data.to_csv("CHARLS_data_pollutants_exit.csv",index=False)
														
--- a/CHARLS_P/CHARLS_harmonized.py
+++ b/CHARLS_P/CHARLS_harmonized.py
@@ -0,0 +1,187 @@
 
															+import pandas as pd
														
 
															+import numpy as np
														
 
															+import pyreadstat
														
 
															+
														
 
															+
														
 
															+# 定义一个函数，用于更新 harmonized 中的 mstat 列
														
 
															+def update_mstat(harmonized, col_name):
														
 
															+    harmonized[col_name] = harmonized[col_name].apply(
														
 
															+        lambda x: 1 if x in [1, 3] else 0 if x in [4, 5, 7, 8] else np.nan
														
 
															+    )
														
 
															+
														
 
															+def update_physical(harmonized):
														
 
															+    harmonized["r1phys"] = harmonized.apply(lambda x : 2 if x["r1vgact_c"]==1 else 
														
 
															+                                                             1 if x["r1mdact_c"]==1 else 
														
 
															+                                                             0 if x["r1ltact_c"] == 1 or (x["r1vgact_c"]==0 and x["r1mdact_c"]==0 and x["r1ltact_c"] == 0) 
														
 
															+                                                             else np.nan ,axis=1)
														
 
															+    harmonized["r2phys"] = harmonized.apply(lambda x : 2 if x["r2vgact_c"]==1 else 
														
 
															+                                                             1 if x["r2mdact_c"]==1 else 
														
 
															+                                                             0 if x["r2ltact_c"] == 1 or (x["r2vgact_c"]==0 and x["r2mdact_c"]==0 and x["r2ltact_c"] == 0) 
														
 
															+                                                             else np.nan ,axis=1)
														
 
															+    harmonized["r3phys"] = harmonized.apply(lambda x : 2 if x["r3vgact_c"]==1 else 
														
 
															+                                                             1 if x["r3mdact_c"]==1 else 
														
 
															+                                                             0 if x["r3ltact_c"] == 1 or (x["r3vgact_c"]==0 and x["r3mdact_c"]==0 and x["r3ltact_c"] == 0) 
														
 
															+                                                             else np.nan ,axis=1)
														
 
															+    harmonized["r4phys"] = harmonized.apply(lambda x : 2 if x["r4vgact_c"]==1 else 
														
 
															+                                                             1 if x["r4mdact_c"]==1 else 
														
 
															+                                                             0 if x["r4ltact_c"] == 1 or (x["r4vgact_c"]==0 and x["r4mdact_c"]==0 and x["r4ltact_c"] == 0) 
														
 
															+                                                             else np.nan ,axis=1)
														
 
															+def merge_data(harmonized, waves, flag="other"):
														
 
															+    merged_data = []
														
 
															+    # 遍历年份和列名，处理合并数据
														
 
															+    for wave, col_name in waves:
														
 
															+        if flag=="mstat":
														
 
															+            update_mstat(harmonized, col_name)
														
 
															+        elif flag == "phys":
														
 
															+            update_physical(harmonized)
														
 
															+        # 获取对应年份的数据，并将结果存入列表
														
 
															+        merged_data.append(pd.merge(
														
 
															+            CHARLS_data[CHARLS_data["wave"] == wave],
														
 
															+            harmonized[["ID", col_name]],
														
 
															+            on="ID",
														
 
															+            how="left"
														
 
															+        )[col_name])
														
 
															+    return merged_data
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    harmonized, meta = pyreadstat.read_dta("/root/r_base/CHARLS/Harmonized_CHARLS/H_CHARLS_D_Data.dta")
														
 
															+    CHARLS_data = pd.read_csv("CHARLS_data_pollutants_p_n_m_nd.csv")
														
 
															+    harmonized['ID'] = harmonized['ID'].astype(str)  # 转换为字符串
														
 
															+    CHARLS_data['ID'] = CHARLS_data['ID'].astype(str)  # 转换为字符串
														
 
															+    #婚姻状况
														
 
															+    # 1 married or partnered
														
 
															+    # 0 other marital status (separated, divorced, unmarried, or widowed)
														
 
															+    # 定义年份和对应的列名
														
 
															+    waves = [(2011, "r1mstat"), (2013, "r2mstat"), (2015, "r3mstat"), (2018, "r4mstat")]
														
 
															+    # 将四列数据合并为一列，并赋值给 CHARLS_data["mstat"]
														
 
															+    CHARLS_data["marital_status_m"] = pd.concat(merge_data(harmonized, waves, "mstat"), ignore_index=True)
														
 
															+
														
 
															+    #身高
														
 
															+    waves = [(2011, "r1mheight"), (2013, "r2mheight"), (2015, "r3mheight")]
														
 
															+    CHARLS_data["Height_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+
														
 
															+    #体重
														
 
															+    waves = [(2011, "r1mweight"), (2013, "r2mweight"), (2015, "r3mweight")]
														
 
															+    CHARLS_data["Weight_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+
														
 
															+    #腰围
														
 
															+    waves = [(2011, "r1mwaist"), (2013, "r2mwaist"), (2015, "r3mwaist")]
														
 
															+    CHARLS_data["waist_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+
														
 
															+    #BMI
														
 
															+    waves = [(2011, "r1mbmi"), (2013, "r2mbmi"), (2015, "r3mbmi")]
														
 
															+    CHARLS_data["BMI"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+
														
 
															+    #收缩压#舒张压
														
 
															+    waves = [(2011, "r1systo"), (2013, "r2systo"), (2015, "r3systo")]
														
 
															+    CHARLS_data["Systolic_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1diasto"), (2013, "r2diasto"), (2015, "r3diasto")]
														
 
															+    CHARLS_data["Diastolic_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+
														
 
															+    # 体力活动
														
 
															+    # 2 vigorous (vigorous activity more than once a week)
														
 
															+    # 1 moderate (moderate activity more than once a week)
														
 
															+    # 0 inactive (the rest)
														
 
															+    waves = [(2011, "r1phys"), (2013, "r2phys"), (2015, "r3phys"), (2018, "r4phys")]
														
 
															+    CHARLS_data["Physical_activity_m"] = pd.concat(merge_data(harmonized, waves, "phys"), ignore_index=True)
														
 
															+
														
 
															+    # 抽烟
														
 
															+    # 1 抽过烟
														
 
															+    # 0 没有抽过烟
														
 
															+    waves = [(2011, "r1smokev"), (2013, "r2smokev"), (2015, "r3smokev"), (2018, "r4smokev")]
														
 
															+    CHARLS_data["Smoke_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+
														
 
															+    # 喝酒
														
 
															+    # 1 喝过酒
														
 
															+    # 0 没有喝过酒
														
 
															+    waves = [(2011, "r1drinkev"), (2013, "r2drinkev"), (2015, "r3drinkev"), (2018, "r4drinkev")]
														
 
															+    CHARLS_data["Drink_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+
														
 
															+    #慢性病
														
 
															+    waves = [(2011, "r1hibpe"), (2013, "r2hibpe"), (2015, "r3hibpe"), (2018, "r4hibpe")]
														
 
															+    CHARLS_data["Hypertension_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1diabe"), (2013, "r2diabe"), (2015, "r3diabe"), (2018, "r4diabe")]
														
 
															+    CHARLS_data["Disabetes_or_High_Blood_Sugar_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1cancre"), (2013, "r2cancre"), (2015, "r3cancre"), (2018, "r4cancre")]
														
 
															+    CHARLS_data["Cancer_or_Malignant_Tumor_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1lunge"), (2013, "r2lunge"), (2015, "r3lunge"), (2018, "r4lunge")]
														
 
															+    CHARLS_data["Chronic_Lung_Diseases_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1hearte"), (2013, "r2hearte"), (2015, "r3hearte"), (2018, "r4hearte")]
														
 
															+    CHARLS_data["Heart_Problems_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1psyche"), (2013, "r2psyche"), (2015, "r3psyche"), (2018, "r4psyche")]
														
 
															+    CHARLS_data["Emotional_Nervous_or_Psychiatric_Problems_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1stroke"), (2013, "r2stroke"), (2015, "r3stroke"), (2018, "r4stroke")]
														
 
															+    CHARLS_data["Stroke_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1arthre"), (2013, "r2arthre"), (2015, "r3arthre"), (2018, "r4arthre")]
														
 
															+    CHARLS_data["Arthritis_or_Rheumatism_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1dyslipe"), (2013, "r2dyslipe"), (2015, "r3dyslipe"), (2018, "r4dyslipe")]
														
 
															+    CHARLS_data["Dyslipidemia_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1livere"), (2013, "r2livere"), (2015, "r3livere"), (2018, "r4livere")]
														
 
															+    CHARLS_data["Liver_Disease_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1kidneye"), (2013, "r2kidneye"), (2015, "r3kidneye"), (2018, "r4kidneye")]
														
 
															+    CHARLS_data["Kidney_Diease_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1digeste"), (2013, "r2digeste"), (2015, "r3digeste"), (2018, "r4digeste")]
														
 
															+    CHARLS_data["Stomach_or_Other_Digestive_Disease_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1asthmae"), (2013, "r2asthmae"), (2015, "r3asthmae"), (2018, "r4asthmae")]
														
 
															+    CHARLS_data["Asthma_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1memrye"), (2013, "r2memrye"), (2015, "r3memrye"), (2018, "r4memrye")]
														
 
															+    CHARLS_data["Memory_Related_Disease_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+
														
 
															+    #心理评分
														
 
															+    waves = [(2011, "s1cesd10"), (2013, "s2cesd10"), (2015, "s3cesd10"), (2018, "s4cesd10")]
														
 
															+    CHARLS_data["Psychiatric_score_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+
														
 
															+    #睡眠状态
														
 
															+    waves = [(2011, "r1sleeprl"), (2013, "r2sleeprl"), (2015, "r3sleeprl"), (2018, "r4sleeprl")]
														
 
															+    CHARLS_data["sleep_state_m"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+
														
 
															+    #计算认知功能得分，分成三部分：电话问卷9分，词语回忆10分、画图1分
														
 
															+    waves = [(2011, "r1orient"), (2013, "r2orient"), (2015, "r3orient"), (2018, "r4orient")]
														
 
															+    CHARLS_data["Date_Naming"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1imrc"), (2013, "r2imrc"), (2015, "r3imrc"), (2018, "r4imrc")]
														
 
															+    CHARLS_data["Immediate_Word_Recall"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1dlrc"), (2013, "r2dlrc"), (2015, "r3dlrc"), (2018, "r4dlrc")]
														
 
															+    CHARLS_data["Delayed_Word_Recall"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1ser7"), (2013, "r2ser7"), (2015, "r3ser7"), (2018, "r4ser7")]
														
 
															+    CHARLS_data["Serial_7"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+    waves = [(2011, "r1draw"), (2013, "r2draw"), (2015, "r3draw"), (2018, "r4draw")]
														
 
															+    CHARLS_data["Drawing_Picture"] = pd.concat(merge_data(harmonized, waves), ignore_index=True)
														
 
															+
														
 
															+    CHARLS_data["Cognition_score_m"] = CHARLS_data["Date_Naming"] + CHARLS_data["Immediate_Word_Recall"] + CHARLS_data["Delayed_Word_Recall"] + CHARLS_data["Serial_7"] + CHARLS_data["Drawing_Picture"]
														
 
															+
														
 
															+    # 整体合并的：性别，出生年，教育
														
 
															+    #教育
														
 
															+    # 0 below high school
														
 
															+    # 1 high school
														
 
															+    # 2 college or above
														
 
															+    harmonized["raeduc_c"] = harmonized["raeduc_c"].apply(lambda x : 1 if x == 6 or x == 7 else 2 if x in [8, 9, 10] else 0 if x in [1,2,3,4,5] else np.nan)
														
 
															+    CHARLS_data = pd.merge(CHARLS_data, harmonized[["ID", "ragender", "rabyear", "raeduc_c"]], on='ID', how='left')
														
 
															+
														
 
															+    #合并
														
 
															+    merge_list = ["marital_status_m",	"Height_m",	"Weight_m",	"waist_m",	"Systolic_m",	"Diastolic_m",
														
 
															+                  	"Physical_activity_m",	"Smoke_m",	'Drink_m',	'Hypertension_m',	'Disabetes_or_High_Blood_Sugar_m',
														
 
															+                    'Cancer_or_Malignant_Tumor_m',	'Chronic_Lung_Diseases_m',	'Heart_Problems_m',	'Emotional_Nervous_or_Psychiatric_Problems_m',
														
 
															+                	'Stroke_m',	'Arthritis_or_Rheumatism_m',	'Dyslipidemia_m',	'Liver_Disease_m',	'Kidney_Diease_m',	'Stomach_or_Other_Digestive_Disease_m',
														
 
															+                	'Asthma_m',	'Memory_Related_Disease_m',	'Psychiatric_score_m',	'sleep_state_m', 'Cognition_score_m']
														
 
															+    
														
 
															+    # 遍历 merge_list 列表
														
 
															+    for col_m in merge_list:
														
 
															+        col = col_m.replace('_m', '')  # 去掉 '_m' 得到相应的列名
														
 
															+        if col in CHARLS_data.columns and col_m in CHARLS_data.columns:
														
 
															+            CHARLS_data[col] = CHARLS_data[col_m].fillna(CHARLS_data[col])
														
 
															+
														
 
															+    # 处理慢性病标准不一样，将2变为0
														
 
															+    chronic_disease = ['Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases', 
														
 
															+                  'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease', 
														
 
															+                  'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma']
														
 
															+    CHARLS_data[chronic_disease] = CHARLS_data[chronic_disease].replace(2, 0)
														
 
															+
														
 
															+    #处理"ragender", "rabyear", "raeduc_c"
														
 
															+    common_new_list = ["ragender", "rabyear", "raeduc_c"]
														
 
															+    common_list = ["rgender", "birth_year", "education"]
														
 
															+    for col_m, col in zip(common_new_list, common_list):
														
 
															+        if col in CHARLS_data.columns and col_m in CHARLS_data.columns:
														
 
															+            CHARLS_data[col] = CHARLS_data[col_m].fillna(CHARLS_data[col])
														
 
															+
														
 
															+    CHARLS_data = CHARLS_data.drop(columns=["Date_Naming", "Immediate_Word_Recall", "Delayed_Word_Recall", "Serial_7", "Drawing_Picture"] + merge_list+ common_new_list)
														
 
															+    CHARLS_data.to_csv("CHARLS_data_pollutants_p_n_m_nd_h.csv", index=False)
														
--- a/CHARLS_P/CHARLS_preprocess_main.py
+++ b/CHARLS_P/CHARLS_preprocess_main.py
@@ -134,12 +134,12 @@ if __name__ == "__main__":
 
															     data_2011 = pd.merge(data_2011, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
														
 
															-    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆20分、画图1分
														
 
															+    #计算认知功能得分，分成三部分：电话问卷9分，词语回忆20分、画图1分
														
 
															     health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
														
 
															     health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
														
 
															     health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
														
 
															     health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
														
 
															-    health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
														
 
															+    # health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
														
 
															     health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
														
 
															     health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
														
 
															     health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
														
@@ -173,7 +173,7 @@ if __name__ == "__main__":
 
															     health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
														
 
															     data_2011["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
														
 
															-        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
														
 
															+        health_status["dc001s3_score"] + health_status["dc002_score"]+ \
														
 
															         health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
														
 
															         health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
														
 
															         health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
														
@@ -354,12 +354,12 @@ if __name__ == "__main__":
 
															     data_2013 = pd.merge(data_2013, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
														
 
															-    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
														
 
															+    #计算认知功能得分，分成三部分：电话问卷9分，词语回忆10分、画图1分
														
 
															     health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
														
 
															     health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
														
 
															     health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
														
 
															     health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
														
 
															-    health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
														
 
															+    # health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
														
 
															     health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
														
 
															     health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
														
 
															     health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
														
@@ -393,7 +393,7 @@ if __name__ == "__main__":
 
															     health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
														
 
															     data_2013["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
														
 
															-        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
														
 
															+        health_status["dc001s3_score"] + health_status["dc002_score"]+ \
														
 
															         health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
														
 
															         health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
														
 
															         health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
														
@@ -550,12 +550,12 @@ if __name__ == "__main__":
 
															     data_2015 = pd.merge(data_2015, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
														
 
															-    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
														
 
															+    #计算认知功能得分，分成三部分：电话问卷9分，词语回忆10分、画图1分
														
 
															     health_status["dc001s1_score"] = health_status["dc001s1"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
														
 
															     health_status["dc001s2_score"] = health_status["dc001s2"].apply(lambda x : 1 if x==2 else 0 if pd.isna(x) else 0)
														
 
															     health_status["dc001s3_score"] = health_status["dc001s3"].apply(lambda x : 1 if x==3 else 0 if pd.isna(x) else 0)
														
 
															     health_status["dc002_score"] = health_status["dc002"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
														
 
															-    health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
														
 
															+    # health_status["dc003_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if pd.isna(x) else 0)
														
 
															     health_status["dc019_score"] = health_status["dc019"].apply(lambda x : 1 if x==93 else 0 if pd.isna(x) else 0) 
														
 
															     health_status["dc020_score"] = health_status["dc020"].apply(lambda x : 1 if x==86 else 0 if pd.isna(x) else 0) 
														
 
															     health_status["dc021_score"] = health_status["dc021"].apply(lambda x : 1 if x==79 else 0 if pd.isna(x) else 0)
														
@@ -589,7 +589,7 @@ if __name__ == "__main__":
 
															     health_status["draw_score"] = health_status["dc025"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
														
 
															     data_2015["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
														
 
															-        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
														
 
															+        health_status["dc001s3_score"] + health_status["dc002_score"]+ \
														
 
															         health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
														
 
															         health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
														
 
															         health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
														
@@ -723,12 +723,12 @@ if __name__ == "__main__":
 
															     data_2018 = pd.merge(data_2018, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
														
 
															-    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
														
 
															+    #计算认知功能得分，分成三部分：电话问卷9分，词语回忆10分、画图1分
														
 
															     cognition["dc001s1_score"] = cognition["dc001_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															     cognition["dc001s2_score"] = cognition["dc006_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															     cognition["dc001s3_score"] = cognition["dc003_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															     cognition["dc002_score"] = cognition["dc005_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															-    cognition["dc003_score"] = cognition["dc002_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															+    # cognition["dc003_score"] = cognition["dc002_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															     cognition["dc019_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_1"]==97 else 1 if pd.isna(x["dc014_w4_1"]) and x["dc014_w4_1_1"]==93 else 0 if pd.isna(x["dc014_w4_1"]) and (not x["dc014_w4_1_1"]==93) else np.nan, axis=1) 
														
 
															     cognition["dc020_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_2"]==97 else 1 if pd.isna(x["dc014_w4_2"]) and x["dc014_w4_2_1"]==86 else 0 if pd.isna(x["dc014_w4_2"]) and (not x["dc014_w4_2_1"]==86) else np.nan, axis=1) 
														
 
															     cognition["dc021_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_3"]==97 else 1 if pd.isna(x["dc014_w4_3"]) and x["dc014_w4_3_1"]==79 else 0 if pd.isna(x["dc014_w4_3"]) and (not x["dc014_w4_3_1"]==79) else np.nan, axis=1)
														
@@ -762,7 +762,7 @@ if __name__ == "__main__":
 
															     cognition["draw_score"] = cognition["dc024_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															     data_2018["Cognition_score"] = cognition["dc001s1_score"] + cognition["dc001s2_score"] + \
														
 
															-        cognition["dc001s3_score"] + cognition["dc002_score"]+ cognition["dc003_score"]+ \
														
 
															+        cognition["dc001s3_score"] + cognition["dc002_score"]+ \
														
 
															         cognition["dc019_score"]+ cognition["dc020_score"] + cognition["dc021_score"]+ \
														
 
															         cognition["dc022_score"]+ cognition["dc023_score"] + cognition["dc006s1_score"] + \
														
 
															         cognition["dc006s2_score"] + cognition["dc006s3_score"] + cognition["dc006s4_score"] + \
														
@@ -906,7 +906,7 @@ if __name__ == "__main__":
 
															     data_2020 = pd.merge(data_2020, health_status_select, on = ["ID", 'householdID', 'communityID'], how="left")
														
 
															-    #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
														
 
															+    #计算认知功能得分，分成三部分：电话问卷9分，词语回忆10分、画图1分
														
 
															     health_status["dc001s1_score"] = health_status["dc001"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
														
 
															     health_status["dc001s2_score"] = health_status["dc005"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
														
 
															     health_status["dc001s3_score"] = health_status["dc003"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
														
@@ -943,7 +943,7 @@ if __name__ == "__main__":
 
															     health_status["draw_score"] = health_status["dc009"].apply(lambda x : 1 if x==1 else 0 if x==2 else np.nan)
														
 
															     data_2020["Cognition_score"] = health_status["dc001s1_score"] + health_status["dc001s2_score"] + \
														
 
															-        health_status["dc001s3_score"] + health_status["dc002_score"]+ health_status["dc003_score"]+ \
														
 
															+        health_status["dc001s3_score"] + health_status["dc002_score"]+ \
														
 
															         health_status["dc019_score"]+ health_status["dc020_score"] + health_status["dc021_score"]+ \
														
 
															         health_status["dc022_score"]+ health_status["dc023_score"] + health_status["dc006s1_score"] + \
														
 
															         health_status["dc006s2_score"] + health_status["dc006s3_score"] + health_status["dc006s4_score"] + \
														
--- a/CHARLS_P/CHARLS_split.py
+++ b/CHARLS_P/CHARLS_split.py
@@ -20,6 +20,11 @@ if __name__ == "__main__":
 
															     disease_features = ['Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases', 
														
 
															                   'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease', 
														
 
															                   'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma']
														
 
															+    
														
 
															+    # 夜光暴露与空气污染对慢性非传染性疾病（高血压、心脏病、糖尿病、肥胖、中风、、关节炎、癌症和记忆相关疾病）的交互影响
														
 
															+    one_data = data.loc[:, base_feature + pollutant_feature + nl_feature + disease_features]
														
 
															+    one_data.to_csv("1.csv", index=False)
														
 
															+    
														
 
															     #夜光暴露与空气污染对老年人认知功能的交互影响及炎症和氧化应激的中介效应
														
 
															     one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature + blood_features+["Cognition_score", "Memory_Related_Disease"]]
														
 
															     one_data.to_csv("2.csv", index=False)
														
@@ -65,10 +70,12 @@ if __name__ == "__main__":
 
															     one_data.to_csv("12.csv", index=False)   
														
 
															     # 13.夜光暴露与空气污染对睡眠障碍的交互影响及炎症和氧化应激的中介效应
														
 
															-    
														
 
															+    one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature + blood_features + ["sleep_state"]]
														
 
															+    one_data.to_csv("13.csv", index=False)
														
 
															     # 14.基于贝叶斯网络的夜光暴露、空气污染与肥胖相关指标（BMI、锥度指数、相对脂肪质量指数等）对疾病发病风险的预测作用
														
 
															-
														
 
															+    one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2013) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature + ["Height", "Weight", "waist"]]
														
 
															+    one_data.to_csv("14.csv", index=False)
														
 
															     # 15.夜光与空气污染暴露变化轨迹（潜增长模型等）与疾病的关联性研究
														
 
															     one_data = data.loc[:, base_feature + pollutant_feature + nl_feature + disease_features]
														
--- a/CHARLS_P/chongqing_pm.py
+++ b/CHARLS_P/chongqing_pm.py
@@ -3,7 +3,7 @@ from glob import glob
 
															 import os
														
 
															 def pollutant_chongqing_handle():
														
 
															-    path = "result_O3"
														
 
															+    path = "pollution/result_SO2"
														
 
															     data = pd.read_csv(path+".csv")
														
 
															     # 找到province列等于'重庆市'的行
														
 
															     chongqing_rows = data[data['province'] == '重庆市']
														
@@ -35,4 +35,5 @@ def aba_chongqing_handle():
 
															         df.to_csv(path+file_name+"_p"+extension, index=False)
														
 
															 if __name__ == "__main__":
														
 
															-    aba_chongqing_handle()
														
 
															+    pollutant_chongqing_handle()
														
 
															+    # aba_chongqing_handle()
														
--- a/CHARLS_P/nc2geotiff.py
+++ b/CHARLS_P/nc2geotiff.py
@@ -14,12 +14,12 @@ import concurrent.futures
 
															 # from osgeo import osr
														
 
															 #Define work and output paths
														
 
															-WorkPath = r'/root/r_base/O3'
														
 
															+WorkPath = r'/root/r_base/pollution/SO2'
														
 
															 OutPath  = WorkPath
														
 
															 #Define air pollutant type 
														
 
															 #e.g., PM1, PM2.5, PM10, O3, NO2, SO2, and CO, et al.
														
 
															-AP = 'O3'
														
 
															+AP = 'SO2'
														
 
															 #Define spatial resolution 
														
 
															 #e.g., 1 km ≈ 0.01 Degree
														
--- a/README.md
+++ b/README.md
@@ -7,4 +7,7 @@ AreaCity-Query-Geometry中是对坐标数据进行处理的程序
 
															 1. 什么是共病，如何定义共病，同时患有两种疾病以上就叫共病？
														
 
															 2. 结局变量为分类变量，状态之间可以相互转化，共病的状态如何定义？
														
 
															-3. 设置状态：健康状态：无任何共病症状的人群。单一疾病状态：患有某一种特定疾病的人群。可以分为不同的疾病类别，如心理疾病（如焦虑、抑郁）或生理疾病（如高血压、糖尿病）。多种疾病共存状态：同时患有两种或多种疾病的状态。例如，焦虑和失眠同时存在，或者糖尿病和心血管疾病共存。恶化状态：共病症状逐渐恶化，病情加重的状态。这一状态通常会与疾病的进展或夜光暴露程度相关。缓解状态：共病症状缓解，患者恢复部分或完全健康的状态。死亡状态：
														
 
															+3. 设置状态：
														
 
															+4. 健康状态：无任何共病症状的人群。
														
 
															+5. 单一疾病状态：患有某一种特定疾病的人群。可以分为不同的疾病类别，如心理疾病（如焦虑、抑郁）或生理疾病（如高血压、糖尿病）。
														
 
															+6. 多种疾病共存状态：同时患有两种或多种疾病的状态。例如，焦虑和失眠同时存在，或者糖尿病和心血管疾病共存。
														
--- a/paper_code/code.R
+++ b/paper_code/code.R
@@ -0,0 +1,60 @@
 
															+# install.packages("msm", repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")
														
 
															+
														
 
															+library(msm)
														
 
															+library(survival)
														
 
															+
														
 
															+# data <- data.frame(
														
 
															+#   ID = c(1, 1, 1, 2, 2, 2),        # 人员ID
														
 
															+#   time = c(0, 1, 2, 0, 1, 2),      # 随访时间
														
 
															+#   state = c(1, 2, 3, 1, 1, 2),     # 疾病状态
														
 
															+#   birth_year = c(1970, 1970, 1970, 1980, 1980, 1980),
														
 
															+#   gender = c(1, 1, 1, 2, 2, 2),    # 性别
														
 
															+#   education = c(3, 3, 3, 2, 2, 2)  # 教育程度
														
 
															+# )
														
 
															+# statetable.msm(state, ID, data = data)
														
 
															+
														
 
															+# qmatrix_init <- matrix(c(-0.5, 0.25, 0.25,
														
 
															+#                          0.1, -0.3, 0.2,
														
 
															+#                          0, 0, 0), 
														
 
															+#                        nrow = 3, byrow = TRUE)
														
 
															+
														
 
															+# msm_model <- msm(state ~ time, subject = ID, data = data,
														
 
															+#                  qmatrix = qmatrix_init, 
														
 
															+#                  covariates = ~ gender + education)
														
 
															+# pmatrix.msm(msm_model, t = 1)  # t = 1 代表随访之间的间隔时间
														
 
															+# summary(msm_model)
														
 
															+
														
 
															+# 创建数据框
														
 
															+data <- data.frame(
														
 
															+  ID = c(1, 1, 1, 2, 2, 2),        # 人员ID
														
 
															+  time = c(0, 1, 2, 0, 1, 2),      # 随访时间
														
 
															+  state = c(1, 2, 3, 1, 1, 2),     # 疾病状态
														
 
															+  birth_year = c(1970, 1970, 1970, 1980, 1980, 1980), # 出生年份
														
 
															+  gender = c(1, 1, 1, 2, 2, 2),    # 性别
														
 
															+  education = c(3, 3, 3, 2, 2, 2)  # 教育程度
														
 
															+)
														
 
															+
														
 
															+# 计算状态转移频数表
														
 
															+freq_table <- statetable.msm(state, ID, data = data)
														
 
															+print(freq_table)
														
 
															+
														
 
															+# 初始化转移速率矩阵
														
 
															+qmatrix_init <- matrix(c(-0.5, 0.25, 0.25,
														
 
															+                          0.1, -0.3, 0.2,
														
 
															+                          0.3, 0.2, -0.5), 
														
 
															+                        nrow = 3, byrow = TRUE)
														
 
															+
														
 
															+# 创建初始模型
														
 
															+crude_init <- crudeinits.msm(state ~ time+ gender, subject = ID, data = data, qmatrix = qmatrix_init)
														
 
															+
														
 
															+# 进行多状态模型分析
														
 
															+msm_model <- msm(state ~ time, subject = ID, data = data,
														
 
															+                 qmatrix = crude_init,
														
 
															+                 covariates = ~ gender+education)
														
 
															+
														
 
															+# 计算状态转移概率矩阵
														
 
															+prob_matrix <- pmatrix.msm(msm_model, t = 1)  # t = 1 代表随访之间的间隔时间
														
 
															+print(prob_matrix)
														
 
															+
														
 
															+# 查看模型的详细结果
														
 
															+summary(msm_model)
														
--- a/test.py
+++ b/test.py
@@ -75,59 +75,76 @@ import pandas as pd
 
															 #     except Exception as e:
														
 
															 #         print(f"Error reading {file_path}: {e}")
														
 
															 # print(num)
														
 
															-import pyreadstat
														
 
															-import numpy as np
														
 
															-
														
 
															-year = "2018"
														
 
															-cognition, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Cognition.dta")
														
 
															-#计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
														
 
															-cognition["dc001s1_score"] = cognition["dc001_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															-cognition["dc001s2_score"] = cognition["dc006_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															-cognition["dc001s3_score"] = cognition["dc003_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															-cognition["dc002_score"] = cognition["dc005_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															-cognition["dc003_score"] = cognition["dc002_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															-cognition["dc019_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_1"]==97 else 1 if pd.isna(x["dc014_w4_1"]) and x["dc014_w4_1_1"]==93 else 0 if pd.isna(x["dc014_w4_1"]) and (not x["dc014_w4_1_1"]==93) else np.nan, axis=1) 
														
 
															-cognition["dc020_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_2"]==97 else 1 if pd.isna(x["dc014_w4_2"]) and x["dc014_w4_2_1"]==86 else 0 if pd.isna(x["dc014_w4_2"]) and (not x["dc014_w4_2_1"]==86) else np.nan, axis=1) 
														
 
															-cognition["dc021_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_3"]==97 else 1 if pd.isna(x["dc014_w4_3"]) and x["dc014_w4_3_1"]==79 else 0 if pd.isna(x["dc014_w4_3"]) and (not x["dc014_w4_3_1"]==79) else np.nan, axis=1)
														
 
															-cognition["dc022_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_4"]==97 else 1 if pd.isna(x["dc014_w4_4"]) and x["dc014_w4_4_1"]==72 else 0 if pd.isna(x["dc014_w4_4"]) and (not x["dc014_w4_4_1"]==72) else np.nan, axis=1)
														
 
															-cognition["dc023_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_5"]==97 else 1 if pd.isna(x["dc014_w4_5"]) and x["dc014_w4_5_1"]==65 else 0 if pd.isna(x["dc014_w4_5"]) and (not x["dc014_w4_5_1"]==65) else np.nan, axis=1)
														
 
															-
														
 
															-#词语记忆
														
 
															-cognition["dc006s1_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s1"]==1 else 0, axis=1)
														
 
															-cognition["dc006s2_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s2"]==2 else 0, axis=1)
														
 
															-cognition["dc006s3_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s3"]==3 else 0, axis=1)
														
 
															-cognition["dc006s4_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s4"]==4 else 0, axis=1) 
														
 
															-cognition["dc006s5_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s5"]==5 else 0, axis=1) 
														
 
															-cognition["dc006s6_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s6"]==6 else 0, axis=1)                                            
														
 
															-cognition["dc006s7_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s7"]==7 else 0, axis=1) 
														
 
															-cognition["dc006s8_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s8"]==8 else 0, axis=1) 
														
 
															-cognition["dc006s9_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s9"]==9 else 0, axis=1)                                            
														
 
															-cognition["dc006s10_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s10"]==10 else 0, axis=1)                                           
														
 
															-# cognition["dc006s11_score"] = cognition["dc028_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
														
 
															-cognition["dc027s1_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s1"]==1 else 0, axis=1) 
														
 
															-cognition["dc027s2_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s2"]==2 else 0, axis=1) 
														
 
															-cognition["dc027s3_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s3"]==3 else 0, axis=1) 
														
 
															-cognition["dc027s4_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s4"]==4 else 0, axis=1) 
														
 
															-cognition["dc027s5_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s5"]==5 else 0, axis=1) 
														
 
															-cognition["dc027s6_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s6"]==6 else 0, axis=1)                                            
														
 
															-cognition["dc027s7_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s7"]==7 else 0, axis=1) 
														
 
															-cognition["dc027s8_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s8"]==8 else 0, axis=1) 
														
 
															-cognition["dc027s9_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s9"]==9 else 0, axis=1)                                            
														
 
															-cognition["dc027s10_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s10"]==10 else 0, axis=1)                                            
														
 
															-# cognition["dc027s11_score"] = cognition["dc047_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
														
 
															-#画图
														
 
															-cognition["draw_score"] = cognition["dc024_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															-
														
 
															-cognition["Cognition_score"] = cognition["dc001s1_score"] + cognition["dc001s2_score"] + \
														
 
															-    cognition["dc001s3_score"] + cognition["dc002_score"]+ cognition["dc003_score"]+ \
														
 
															-    cognition["dc019_score"]+ cognition["dc020_score"] + cognition["dc021_score"]+ \
														
 
															-    cognition["dc022_score"]+ cognition["dc023_score"] + cognition["dc006s1_score"] + \
														
 
															-    cognition["dc006s2_score"] + cognition["dc006s3_score"] + cognition["dc006s4_score"] + \
														
 
															-    cognition["dc006s5_score"] + cognition["dc006s6_score"] + cognition["dc006s7_score"] + \
														
 
															-    cognition["dc006s8_score"] + cognition["dc006s9_score"] + cognition["dc006s10_score"] + \
														
 
															-    cognition["dc027s1_score"]+ cognition["dc027s2_score"]+ \
														
 
															-    cognition["dc027s3_score"]+ cognition["dc027s4_score"]+ cognition["dc027s5_score"]+ \
														
 
															-    cognition["dc027s6_score"]+ cognition["dc027s7_score"]+ cognition["dc027s8_score"]+ \
														
 
															-    cognition["dc027s9_score"]+cognition["dc027s10_score"]+\
														
 
															-    cognition["draw_score"]
														
 
															-cognition.to_csv("/root/r_base/CHARLS/test.csv")
														
 
															+# import pyreadstat
														
 
															+# import numpy as np
														
 
															+
														
 
															+# year = "2018"
														
 
															+# cognition, meta = pyreadstat.read_dta("/root/r_base/CHARLS/CHARLS"+year+"/Cognition.dta")
														
 
															+# #计算认知功能得分，分成三部分：电话问卷10分，词语回忆10分、画图1分
														
 
															+# cognition["dc001s1_score"] = cognition["dc001_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															+# cognition["dc001s2_score"] = cognition["dc006_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															+# cognition["dc001s3_score"] = cognition["dc003_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															+# cognition["dc002_score"] = cognition["dc005_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															+# cognition["dc003_score"] = cognition["dc002_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															+# cognition["dc019_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_1"]==97 else 1 if pd.isna(x["dc014_w4_1"]) and x["dc014_w4_1_1"]==93 else 0 if pd.isna(x["dc014_w4_1"]) and (not x["dc014_w4_1_1"]==93) else np.nan, axis=1) 
														
 
															+# cognition["dc020_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_2"]==97 else 1 if pd.isna(x["dc014_w4_2"]) and x["dc014_w4_2_1"]==86 else 0 if pd.isna(x["dc014_w4_2"]) and (not x["dc014_w4_2_1"]==86) else np.nan, axis=1) 
														
 
															+# cognition["dc021_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_3"]==97 else 1 if pd.isna(x["dc014_w4_3"]) and x["dc014_w4_3_1"]==79 else 0 if pd.isna(x["dc014_w4_3"]) and (not x["dc014_w4_3_1"]==79) else np.nan, axis=1)
														
 
															+# cognition["dc022_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_4"]==97 else 1 if pd.isna(x["dc014_w4_4"]) and x["dc014_w4_4_1"]==72 else 0 if pd.isna(x["dc014_w4_4"]) and (not x["dc014_w4_4_1"]==72) else np.nan, axis=1)
														
 
															+# cognition["dc023_score"] = cognition.apply(lambda x : 0 if x["dc014_w4_5"]==97 else 1 if pd.isna(x["dc014_w4_5"]) and x["dc014_w4_5_1"]==65 else 0 if pd.isna(x["dc014_w4_5"]) and (not x["dc014_w4_5_1"]==65) else np.nan, axis=1)
														
 
															+
														
 
															+# #词语记忆
														
 
															+# cognition["dc006s1_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s1"]==1 else 0, axis=1)
														
 
															+# cognition["dc006s2_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s2"]==2 else 0, axis=1)
														
 
															+# cognition["dc006s3_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s3"]==3 else 0, axis=1)
														
 
															+# cognition["dc006s4_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s4"]==4 else 0, axis=1) 
														
 
															+# cognition["dc006s5_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s5"]==5 else 0, axis=1) 
														
 
															+# cognition["dc006s6_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s6"]==6 else 0, axis=1)                                            
														
 
															+# cognition["dc006s7_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s7"]==7 else 0, axis=1) 
														
 
															+# cognition["dc006s8_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s8"]==8 else 0, axis=1) 
														
 
															+# cognition["dc006s9_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s9"]==9 else 0, axis=1)                                            
														
 
															+# cognition["dc006s10_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc028_w4_s10"]==10 else 0, axis=1)                                           
														
 
															+# # cognition["dc006s11_score"] = cognition["dc028_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0) 
														
 
															+# cognition["dc027s1_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s1"]==1 else 0, axis=1) 
														
 
															+# cognition["dc027s2_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s2"]==2 else 0, axis=1) 
														
 
															+# cognition["dc027s3_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s3"]==3 else 0, axis=1) 
														
 
															+# cognition["dc027s4_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s4"]==4 else 0, axis=1) 
														
 
															+# cognition["dc027s5_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s5"]==5 else 0, axis=1) 
														
 
															+# cognition["dc027s6_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s6"]==6 else 0, axis=1)                                            
														
 
															+# cognition["dc027s7_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s7"]==7 else 0, axis=1) 
														
 
															+# cognition["dc027s8_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s8"]==8 else 0, axis=1) 
														
 
															+# cognition["dc027s9_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s9"]==9 else 0, axis=1)                                            
														
 
															+# cognition["dc027s10_score"] = cognition.apply(lambda x : np.nan if not x["wr101_intro"] ==1 else 1 if x["dc047_w4_s10"]==10 else 0, axis=1)                                            
														
 
															+# # cognition["dc027s11_score"] = cognition["dc047_w4_s11"].apply(lambda x : 1 if x==11 else 0 if pd.isna(x) else 0)
														
 
															+# #画图
														
 
															+# cognition["draw_score"] = cognition["dc024_w4"].apply(lambda x : 1 if x==1 else 0 if x==5 else np.nan)
														
 
															+
														
 
															+# cognition["Cognition_score"] = cognition["dc001s1_score"] + cognition["dc001s2_score"] + \
														
 
															+#     cognition["dc001s3_score"] + cognition["dc002_score"]+ cognition["dc003_score"]+ \
														
 
															+#     cognition["dc019_score"]+ cognition["dc020_score"] + cognition["dc021_score"]+ \
														
 
															+#     cognition["dc022_score"]+ cognition["dc023_score"] + cognition["dc006s1_score"] + \
														
 
															+#     cognition["dc006s2_score"] + cognition["dc006s3_score"] + cognition["dc006s4_score"] + \
														
 
															+#     cognition["dc006s5_score"] + cognition["dc006s6_score"] + cognition["dc006s7_score"] + \
														
 
															+#     cognition["dc006s8_score"] + cognition["dc006s9_score"] + cognition["dc006s10_score"] + \
														
 
															+#     cognition["dc027s1_score"]+ cognition["dc027s2_score"]+ \
														
 
															+#     cognition["dc027s3_score"]+ cognition["dc027s4_score"]+ cognition["dc027s5_score"]+ \
														
 
															+#     cognition["dc027s6_score"]+ cognition["dc027s7_score"]+ cognition["dc027s8_score"]+ \
														
 
															+#     cognition["dc027s9_score"]+cognition["dc027s10_score"]+\
														
 
															+#     cognition["draw_score"]
														
 
															+# cognition.to_csv("/root/r_base/CHARLS/test.csv")
														
 
															+
														
 
															+import pandas as pd
														
 
															+CHARLS_data = pd.read_csv("CHARLS_data_pollutants_p_n_m_nd_h.csv")
														
 
															+#合并
														
 
															+merge_list = ["marital_status_m",	"Height_m",	"Weight_m",	"waist_m",	"Systolic_m",	"Diastolic_m",
														
 
															+                "Physical_activity_m",	"Smoke_m",	'Drink_m',	'Hypertension_m',	'Disabetes_or_High_Blood_Sugar_m',
														
 
															+                'Cancer_or_Malignant_Tumor_m',	'Chronic_Lung_Diseases_m',	'Heart_Problems_m',	'Emotional_Nervous_or_Psychiatric_Problems_m',
														
 
															+                'Stroke_m',	'Arthritis_or_Rheumatism_m',	'Dyslipidemia_m',	'Liver_Disease_m',	'Kidney_Diease_m',	'Stomach_or_Other_Digestive_Disease_m',
														
 
															+                'Asthma_m',	'Memory_Related_Disease_m',	'Psychiatric_score_m',	'sleep_state_m', 'Cognition_score_m']
														
 
															+
														
 
															+# 遍历 merge_list 列表
														
 
															+for col_m in merge_list:
														
 
															+    col = col_m.replace('_m', '')  # 去掉 '_m' 得到相应的列名
														
 
															+    if col in CHARLS_data.columns and col_m in CHARLS_data.columns:
														
 
															+        CHARLS_data[col] = CHARLS_data[col_m].fillna(CHARLS_data[col])
														
 
															+
														
 
															+CHARLS_data.to_csv("CHARLS_data_pollutants_p_n_m_nd_h_test.csv")
Author	SHA1 Message	Date
JazzZhao	3f9500c2be CHARLS数据库优化	1 month ago
JazzZhao	7af394ed45 调整认知的得分	1 month ago