CHARLS_split.py 6.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. import pandas as pd
  2. if __name__ == "__main__":
  3. path = "CHARLS_data_pollutants_mete.csv"
  4. data = pd.read_csv(path, encoding="utf-8")
  5. print(data.info())
  6. base_feature = ['ID', 'wave','rgender','birth_year','marital_status','education','Smoke', 'Drink']
  7. pollutant_feature = ['last_year_O3', 'before_last_O3', 'last_year_pm1', 'before_last_pm1','last_year_pm2.5','before_last_pm2.5',
  8. 'last_year_pm10', 'before_last_pm10',
  9. 'last_year_SO4', 'last_year_NO3', 'last_year_NH4', 'last_year_OM', 'last_year_BC',
  10. 'before_last_SO4', 'before_last_NO3', 'before_last_NH4', 'before_last_OM', 'before_last_BC']
  11. nl_feature = ['last_year_nl', 'before_last_nl']
  12. meteorology_features = ['last_year_sunlight', 'before_last_sunlight', 'last_year_wind', 'before_last_wind',
  13. 'last_year_rain', 'before_last_rain', 'last_year_temperature', 'before_last_temperature',
  14. 'last_year_humidity', 'before_last_humidity']
  15. blood_features = ['bl_wbc', 'bl_mcv', 'bl_plt', 'bl_bun', 'bl_glu', 'bl_crea', 'bl_cho', 'bl_tg', 'bl_hdl', 'bl_ldl',
  16. 'bl_crp', 'bl_hbalc', 'bl_ua', 'bl_hct', 'bl_hgb', 'bl_cysc']
  17. disease_features = ['Hypertension','Dyslipidemia','Disabetes_or_High_Blood_Sugar','Cancer_or_Malignant_Tumor','Chronic_Lung_Diseases',
  18. 'Liver_Disease', 'Heart_Problems', 'Stroke', 'Kidney_Diease','Stomach_or_Other_Digestive_Disease',
  19. 'Emotional_Nervous_or_Psychiatric_Problems', 'Memory_Related_Disease','Arthritis_or_Rheumatism','Asthma']
  20. #夜光暴露与空气污染对老年人认知功能的交互影响及炎症和氧化应激的中介效应
  21. one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature + blood_features+["Cognition_score", "Memory_Related_Disease"]]
  22. one_data.to_csv("2.csv", index=False)
  23. #夜光暴露与空气污染及其交互作用与代谢综合征关联性研究
  24. one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature +["bl_glu",'bl_tg','bl_hdl', "Height", "Weight", "Systolic", "Diastolic", "Dyslipidemia", "Disabetes_or_High_Blood_Sugar"]]
  25. one_data.to_csv("3.csv", index=False)
  26. # 4.夜光暴露与空气污染对心理健康(抑郁症状,生活满意度)的交互影响及炎症和氧化应激的中介效应
  27. one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature + blood_features +["Psychiatric_score", "Emotional_Nervous_or_Psychiatric_Problems"]]
  28. one_data.to_csv("4.csv", index=False)
  29. # 5.夜光暴露与空气污染及其交互作用对多病共存的影响及炎症和氧化应激的中介效应
  30. one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature + blood_features + disease_features]
  31. one_data.to_csv("5.csv", index=False)
  32. # 6.夜光暴露与空气污染对胰岛素抵抗(甘油三酯-葡萄糖指数)及炎症和氧化应激的中介效应
  33. one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature + blood_features]
  34. one_data.to_csv("6.csv", index=False)
  35. # 7.基于机器学习探究夜光暴露、应激性高血糖比和空气污染对泛血管疾病/心血管病的影响与预测作用 AI探索之间的关联性,预测效果优于传统方法
  36. one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature + ["bl_glu", "bl_hbalc", "Heart_Problems"]]
  37. one_data.to_csv("7.csv", index=False)
  38. # 8.夜光暴露与慢性非传染性疾病(高血压、心脏病、中风、糖尿病、关节炎、癌症和记忆相关疾病)关联的因果效应
  39. one_data = data.loc[:, base_feature + nl_feature + disease_features]
  40. one_data.to_csv("8.csv", index=False)
  41. # 9.夜光暴露与空气污染对心脏代谢多发病,肾功能异常,血脂异常,痛风,高尿酸血症,代谢综合征等交互影响及炎症和氧化应激的中介效应
  42. one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature + blood_features + disease_features]
  43. one_data.to_csv("9.csv", index=False)
  44. # 10.夜光暴露与空气污染累积联合暴露与疾病之间的关联性
  45. one_data = data.loc[:, base_feature + pollutant_feature + nl_feature + disease_features]
  46. one_data.to_csv("10.csv", index=False)
  47. # 11.夜光暴露与空气污染对感染性疾病的交互影响及炎症和氧化应激的中介效应
  48. one_data = data.loc[(data['wave'] == 2011) | (data['wave'] == 2015), base_feature + pollutant_feature + nl_feature + blood_features + disease_features]
  49. one_data.to_csv("11.csv", index=False)
  50. # 12.基于等时替代模型的夜光暴露与空气污染与疾病的关联性及机制研究
  51. one_data = data.loc[:, base_feature + pollutant_feature + nl_feature + disease_features]
  52. one_data.to_csv("12.csv", index=False)
  53. # 13.夜光暴露与空气污染对睡眠障碍的交互影响及炎症和氧化应激的中介效应
  54. # 14.基于贝叶斯网络的夜光暴露、空气污染与肥胖相关指标(BMI、锥度指数、相对脂肪质量指数等)对疾病发病风险的预测作用
  55. # 15.夜光与空气污染暴露变化轨迹(潜增长模型等)与疾病的关联性研究
  56. one_data = data.loc[:, base_feature + pollutant_feature + nl_feature + disease_features]
  57. one_data.to_csv("15.csv", index=False)
  58. # 17.基于全环境暴露组的疾病发生风险预测模型构建及验证
  59. one_data = data.loc[:, base_feature + pollutant_feature + nl_feature + meteorology_features + disease_features]
  60. one_data.to_csv("17.csv", index=False)
  61. # 18.基于倍差法的夜光暴露与空气污染对疾病发病风险的因果效应
  62. one_data = data.loc[:, base_feature + pollutant_feature + nl_feature + disease_features]
  63. one_data.to_csv("18.csv", index=False)
  64. # 19.夜光暴露与空气污染的联合暴露与疾病发生风险的关联性
  65. one_data = data.loc[:, base_feature + pollutant_feature + nl_feature + disease_features]
  66. one_data.to_csv("19.csv", index=False)
  67. # 20.基于深度神经网络的全环境暴露组与疾病的关联性及发生风险预测
  68. one_data = data.loc[:, base_feature + pollutant_feature + nl_feature + meteorology_features + disease_features]
  69. one_data.to_csv("20.csv", index=False)