night_light_data_preprocess.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. import pandas as pd
  2. from glob import glob
  3. import os
  4. import re
  5. #文件夹
  6. folderpath = "night_light"
  7. # 读取各个年份的夜光数据
  8. files = os.listdir(folderpath)
  9. #读取地名与ID的对应文件
  10. ok_data_level3 = pd.read_csv("ok_data_level3.csv")
  11. # 新建空的存放最后的结果
  12. result_all = pd.DataFrame()
  13. for file_name in files:
  14. #获取年份
  15. match = re.search(r'(F\d{4}|SNNP\d{4})', file_name)
  16. if match:
  17. year_str = match.group(0)[-4:]
  18. year = int(year_str)
  19. data_one_year = pd.read_csv(os.path.join(folderpath, file_name))
  20. #只需要id、ext_name、MEAN
  21. if result_all.empty:
  22. tmp_result = pd.merge(data_one_year, ok_data_level3, on="id", how="left")
  23. result_all = tmp_result[["id", "ext_name", "MEAN"]]
  24. #改列名
  25. result_all.rename(columns={"MEAN":year}, inplace=True)
  26. else:
  27. result_all = pd.merge(result_all, data_one_year[["id", "MEAN"]], on="id", how="left")
  28. #改列名
  29. result_all.rename(columns={"MEAN":year}, inplace=True)
  30. else:
  31. print(f"no year find in file: {file_name}")
  32. result_all.to_csv("night_light_result.csv", encoding="utf-8", index=False)