CHARLS_PM.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. import pandas as pd
  2. from glob import glob
  3. import os
  4. def pollutant_handle(path):
  5. years = [2011, 2013,2015, 2018, 2020]
  6. #读取污染物数据
  7. pollutants_data = pd.read_csv("pollution/result_pm10_1km_p.csv")
  8. for year in years:
  9. CHARLS_data = pd.read_csv(path)
  10. print(CHARLS_data.info())
  11. #开始筛选出year的数据
  12. CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
  13. #两个表合并
  14. table_merge = pd.merge(CHARLS_data_year, pollutants_data, on=['province', 'city'], how='left')
  15. if str(year) in table_merge.columns:
  16. #更新CHARLS表
  17. CHARLS_data.loc[CHARLS_data['wave']==year, 'cur_year_PM10'] = table_merge[str(year)].values
  18. if str(year - 1) in table_merge.columns:
  19. CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_PM10'] = table_merge[str(year-1)].values
  20. if str(year - 2) in table_merge.columns:
  21. CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_PM10'] = table_merge[str(year-2)].values
  22. CHARLS_data.to_csv(path ,index=False)
  23. print(year)
  24. def aba_handle(path_data):
  25. years = [2011, 2013,2015, 2018, 2020]
  26. for year in years:
  27. CHARLS_data = pd.read_csv(path_data)
  28. path = "aba627/result/"
  29. #读取污染物组分
  30. cur_year_file_name = path+str(year)+"_PM25_and_species_p.csv"
  31. last_year_file_name = path+str(year-1)+"_PM25_and_species_p.csv"
  32. before_last_file_name = path+str(year-2)+"_PM25_and_species_p.csv"
  33. cur_pollutants_data = pd.read_csv(cur_year_file_name)
  34. last_year_pollutants_data = pd.read_csv(last_year_file_name)
  35. before_last_pollutants_data = pd.read_csv(before_last_file_name)
  36. #开始筛选出year的数据
  37. CHARLS_data_year = CHARLS_data[CHARLS_data['wave']==year]
  38. #和当年的污染物组分文件合并
  39. cur_table_merge = pd.merge(CHARLS_data_year, cur_pollutants_data, on=['province', 'city'], how='left')
  40. CHARLS_data.loc[CHARLS_data['wave']==year, 'cur_year_SO4'] = cur_table_merge["SO4"].values
  41. CHARLS_data.loc[CHARLS_data['wave']==year, 'cur_year_NO3'] = cur_table_merge["NO3"].values
  42. CHARLS_data.loc[CHARLS_data['wave']==year, 'cur_year_NH4'] = cur_table_merge["NH4"].values
  43. CHARLS_data.loc[CHARLS_data['wave']==year, 'cur_year_OM'] = cur_table_merge["OM"].values
  44. CHARLS_data.loc[CHARLS_data['wave']==year, 'cur_year_BC'] = cur_table_merge["BC"].values
  45. #和上一年的污染物组分文件合并
  46. last_table_merge = pd.merge(CHARLS_data_year, last_year_pollutants_data, on=['province', 'city'], how='left')
  47. CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_SO4'] = last_table_merge["SO4"].values
  48. CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_NO3'] = last_table_merge["NO3"].values
  49. CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_NH4'] = last_table_merge["NH4"].values
  50. CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_OM'] = last_table_merge["OM"].values
  51. CHARLS_data.loc[CHARLS_data['wave']==year, 'last_year_BC'] = last_table_merge["BC"].values
  52. #和上上年的污染物组分文件合并
  53. before_last_table_merge = pd.merge(CHARLS_data_year, before_last_pollutants_data, on=['province', 'city'], how='left')
  54. CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_SO4'] = before_last_table_merge["SO4"].values
  55. CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_NO3'] = before_last_table_merge["NO3"].values
  56. CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_NH4'] = before_last_table_merge["NH4"].values
  57. CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_OM'] = before_last_table_merge["OM"].values
  58. CHARLS_data.loc[CHARLS_data['wave']==year, 'before_last_BC'] = before_last_table_merge["BC"].values
  59. #更新CHARLS表
  60. CHARLS_data.to_csv("CHARLS_data_pollutants.csv",index=False)
  61. print(year)
  62. if __name__ == "__main__":
  63. #读取CHARLS数据
  64. path = "CHARLS_data_pollutants.csv"
  65. # CHARLS_data = pd.read_csv("CHARLS/result_all_new.csv")
  66. # print(CHARLS_data.info())
  67. # CHARLS_data.to_csv("CHARLS_data_pollutants.csv",index=False)
  68. #处理污染物
  69. # pollutant_handle(path)
  70. #处理PM2.5组分
  71. aba_handle(path)