CHARLS_PM.py 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. import pandas as pd
  2. from glob import glob
  3. import os
  4. def pollutant_handle(CHARLS_data):
  5. #读取污染物数据
  6. pollutants_data = pd.read_csv("result_O3_p.csv")
  7. #处理哪一年的数据
  8. year = 2020
  9. #开始筛选出year的数据
  10. CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
  11. #两个表合并
  12. table_merge = pd.merge(CHARLS_data_year, pollutants_data, on=['province', 'city'], how='left')
  13. #更新CHARLS表
  14. CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_O3'] = table_merge[str(year-1)].values
  15. CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_O3'] = table_merge[str(year-2)].values
  16. CHARLS_data.to_csv("CHARLS_data_pollutants.csv",index=False)
  17. print(year)
  18. def aba_handle(CHARLS_data):
  19. #处理CHARLS数据的年份
  20. year = 2020
  21. path = "aba627/result/"
  22. #读取污染物组分
  23. last_year_file_name = path+str(year-1)+"_PM25_and_species_p.csv"
  24. before_last_file_name = path+str(year-2)+"_PM25_and_species_p.csv"
  25. last_year_pollutants_data = pd.read_csv(last_year_file_name)
  26. before_last_pollutants_data = pd.read_csv(before_last_file_name)
  27. #开始筛选出year的数据
  28. CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
  29. #和上一年的污染物组分文件合并
  30. last_table_merge = pd.merge(CHARLS_data_year, last_year_pollutants_data, on=['province', 'city'], how='left')
  31. CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_SO4'] = last_table_merge["SO4"].values
  32. CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_NO3'] = last_table_merge["NO3"].values
  33. CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_NH4'] = last_table_merge["NH4"].values
  34. CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_OM'] = last_table_merge["OM"].values
  35. CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_BC'] = last_table_merge["BC"].values
  36. #和上上年的污染物组分文件合并
  37. before_last_table_merge = pd.merge(CHARLS_data_year, before_last_pollutants_data, on=['province', 'city'], how='left')
  38. CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_SO4'] = before_last_table_merge["SO4"].values
  39. CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_NO3'] = before_last_table_merge["NO3"].values
  40. CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_NH4'] = before_last_table_merge["NH4"].values
  41. CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_OM'] = before_last_table_merge["OM"].values
  42. CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_BC'] = before_last_table_merge["BC"].values
  43. #更新CHARLS表
  44. CHARLS_data.to_csv("CHARLS_data_pollutants.csv",index=False)
  45. print(year)
  46. if __name__ == "__main__":
  47. #读取CHARLS数据
  48. CHARLS_data = pd.read_csv("CHARLS_data_pollutants.csv")
  49. print(CHARLS_data.info())
  50. # CHARLS_data1 = pd.read_csv("NHANES/result_all.csv")
  51. # print(CHARLS_data1.info())
  52. #处理污染物
  53. # pollutant_handle(CHARLS_data)
  54. #处理PM2.5组分
  55. # aba_handle(CHARLS_data)