835397a6ba
[skip ci]
97 lines
3.5 KiB
Python
97 lines
3.5 KiB
Python
import logging
|
|
import os
|
|
import sys
|
|
import csv
|
|
import osmium
|
|
import geopandas as gpd
|
|
import shapely.wkb
|
|
import geojson
|
|
|
|
wkbfab = osmium.geom.WKBFactory()
|
|
|
|
def get_cities_per_institution(csv_file):
|
|
logging.info("Reading CSV file")
|
|
cities_per_institution = {}
|
|
with open(csv_file, "r") as f:
|
|
reader = csv.DictReader(f, delimiter=";")
|
|
for row in reader:
|
|
institution = row["institution"]
|
|
city = row["code_commune"]
|
|
if institution == "":
|
|
institution = "PN"
|
|
if institution in cities_per_institution:
|
|
cities_per_institution[institution].append(city)
|
|
else:
|
|
cities_per_institution[institution] = [city]
|
|
logging.info("Found {} institutions: {}".format((len(cities_per_institution)), cities_per_institution.keys()))
|
|
logging.info("Found {} cities".format(sum([len(cities) for cities in cities_per_institution.values()])))
|
|
return cities_per_institution
|
|
|
|
|
|
class OSMHandler(osmium.SimpleHandler):
|
|
def __init__(self):
|
|
osmium.SimpleHandler.__init__(self)
|
|
self.osm_data = []
|
|
|
|
def area(self, a):
|
|
if (a.tags.get('admin_level') == '8' or a.tags.get('admin_level') == '9') and a.tags.get('ref:INSEE'):
|
|
logging.debug("Found area with INSEE code {}".format(a.tags.get('ref:INSEE')))
|
|
geometry = shapely.wkb.loads(wkbfab.create_multipolygon(a), hex=True)
|
|
self.osm_data.append({
|
|
'id': a.orig_id(),
|
|
'insee_code': a.tags.get('ref:INSEE'),
|
|
'shape': geometry
|
|
})
|
|
|
|
|
|
def generate_geojson(cities_per_institutions, output_file):
|
|
logging.info("Reading OSM file")
|
|
handler = OSMHandler()
|
|
handler.apply_file(osm_file, locations=True)
|
|
|
|
gdf = gpd.GeoDataFrame(handler.osm_data)
|
|
|
|
logging.info("===== OSM file informations =====")
|
|
gdf.info(verbose=True)
|
|
|
|
gdf.set_index('id', inplace=True)
|
|
features = []
|
|
for institution, cities in cities_per_institutions.items():
|
|
logging.info("Processing institution {}".format(institution))
|
|
# Create a GeoDataFrame with only the cities of the institution
|
|
gdf[institution] = gdf['insee_code'].isin(cities)
|
|
# Extract a list of all the geometries of the cities of the institution
|
|
institution_gdf = gdf[gdf[institution]]['shape']
|
|
# Merge all the geometries into one
|
|
institution_geometry = shapely.union_all(institution_gdf)
|
|
# Create a GeoJSON feature
|
|
feature = geojson.Feature(geometry=geojson.loads(geojson.dumps(institution_geometry)), properties={"institution": institution})
|
|
features.append(feature)
|
|
|
|
feature_collection = geojson.FeatureCollection(features)
|
|
logging.info("Writing GeoJSON file")
|
|
with open(output_file, "w") as f:
|
|
geojson.dump(feature_collection, f)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if os.getenv("RUNNER_DEBUG"):
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
else:
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
logging.info("Starting...")
|
|
if len(sys.argv) != 4:
|
|
logging.error("Usage: python generate_geojson.py <csv_file> <osm_file> <output_file>")
|
|
sys.exit(1)
|
|
|
|
csv_file = sys.argv[1]
|
|
logging.info("Input:")
|
|
logging.info(" CSV file: {}".format(csv_file))
|
|
osm_file = sys.argv[2]
|
|
logging.info(" OSM file: {}".format(osm_file))
|
|
output_file = sys.argv[3]
|
|
logging.info("Output: {}".format(output_file))
|
|
|
|
cities_per_institution = get_cities_per_institution(csv_file)
|
|
generate_geojson(cities_per_institution, output_file) |