aba_data_preprocess.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. import pandas as pd
  2. import requests
  3. from time import sleep
  4. import json
  5. import os
  6. from glob import glob
  7. def get_city(lon, lat):
  8. params = {
  9. "lng": "{:.6f}".format(lon),
  10. "lat": "{:.6f}".format(lat)
  11. }
  12. flag = True
  13. while(flag):
  14. try:
  15. response_work = requests.get(url="http://103.116.120.27:9527/queryPoint",params = params)
  16. flag = False
  17. except Exception as e:
  18. print(f"请求错误一次:{e}")
  19. sleep(10)
  20. pass
  21. res_json_work = json.loads(response_work.text)
  22. #坐标在国内
  23. list_local_work = res_json_work['v']['list']
  24. if len(list_local_work) > 0:
  25. try:
  26. if len(list_local_work) == 1:
  27. province_city_work = [local_work['ext_path'] for local_work in list_local_work if local_work['deep'] == '0']
  28. return province_city_work[0], province_city_work[0]
  29. else:
  30. province_city_work = [local_work['ext_path'] for local_work in list_local_work if local_work['deep'] == '1']
  31. return province_city_work[0].split(" ")[0], province_city_work[0].split(" ")[1]
  32. except Exception as e:
  33. print("发生成异常"+json.dumps(list_local_work))
  34. else:
  35. print(f"这是一个空的坐标:{lon},{lat}\n")
  36. return "", ""
  37. if __name__ == "__main__":
  38. folder_path = "aba627/"
  39. result_path = "aba627/result/"
  40. #拿到文件夹中所有的csv文件
  41. csv_files = glob(folder_path+"*.csv")
  42. for file_path in csv_files:
  43. #对应省份和城市
  44. province_list = []
  45. city_list = []
  46. data = pd.read_csv(file_path, encoding="utf-8")
  47. #获取经纬度
  48. lons = data["X_Lon"]
  49. lats = data["Y_Lat"]
  50. for lon, lat in zip(lons, lats):
  51. province, city = get_city(lon=lon, lat=lat)
  52. province_list.append(province)
  53. city_list.append(city)
  54. data["province"] = province_list
  55. data["city"] = city_list
  56. data = data.iloc[:,4:].groupby(by=["province", "city"]).mean().reset_index()
  57. data.to_csv(result_path+os.path.basename(file_path), encoding="utf-8", index=False)