zone-competences-geojson/generate_geojson.py
Charles P. 0792df9ee9
Some checks failed
Cache OSM data / cache (push) Failing after 1m51s
Initial commit
2024-07-06 17:43:00 +02:00

93 lines
3.4 KiB
Python

import logging
import sys
import csv
import osmium
import geopandas as gpd
import shapely.wkb
import geojson
wkbfab = osmium.geom.WKBFactory()
def get_cities_per_institution(csv_file):
logging.info("Reading CSV file")
cities_per_institution = {}
with open(csv_file, "r") as f:
reader = csv.DictReader(f, delimiter=";")
for row in reader:
institution = row["institution"]
city = row["code_commune"]
if institution == "":
institution = "PN"
if institution in cities_per_institution:
cities_per_institution[institution].append(city)
else:
cities_per_institution[institution] = [city]
logging.info("Found {} institutions: {}".format((len(cities_per_institution)), cities_per_institution.keys()))
logging.info("Found {} cities".format(sum([len(cities) for cities in cities_per_institution.values()])))
return cities_per_institution
class OSMHandler(osmium.SimpleHandler):
def __init__(self):
osmium.SimpleHandler.__init__(self)
self.osm_data = []
def area(self, a):
if (a.tags.get('admin_level') == '8' or a.tags.get('admin_level') == '9') and a.tags.get('ref:INSEE'):
logging.debug("Found area with INSEE code {}".format(a.tags.get('ref:INSEE')))
geometry = shapely.wkb.loads(wkbfab.create_multipolygon(a), hex=True)
self.osm_data.append({
'id': a.orig_id(),
'insee_code': a.tags.get('ref:INSEE'),
'shape': geometry
})
def generate_geojson(cities_per_institutions, output_file):
logging.info("Reading OSM file")
handler = OSMHandler()
handler.apply_file(osm_file, locations=True)
gdf = gpd.GeoDataFrame(handler.osm_data)
logging.info("===== OSM file informations =====")
gdf.info(verbose=True)
gdf.set_index('id', inplace=True)
features = []
for institution, cities in cities_per_institutions.items():
logging.info("Processing institution {}".format(institution))
# Create a GeoDataFrame with only the cities of the institution
gdf[institution] = gdf['insee_code'].isin(cities)
# Extract a list of all the geometries of the cities of the institution
institution_gdf = gdf[gdf[institution]]['shape']
# Merge all the geometries into one
institution_geometry = shapely.union_all(institution_gdf)
# Create a GeoJSON feature
feature = geojson.Feature(geometry=geojson.loads(geojson.dumps(institution_geometry)), properties={"institution": institution})
features.append(feature)
feature_collection = geojson.FeatureCollection(features)
logging.info("Writing GeoJSON file")
with open(output_file, "w") as f:
geojson.dump(feature_collection, f)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.info("Starting...")
if len(sys.argv) != 4:
logging.error("Usage: python generate_geojson.py <csv_file> <osm_file> <output_file>")
sys.exit(1)
csv_file = sys.argv[1]
logging.info("Input:")
logging.info(" CSV file: {}".format(csv_file))
osm_file = sys.argv[2]
logging.info(" OSM file: {}".format(osm_file))
output_file = sys.argv[3]
logging.info("Output: {}".format(output_file))
cities_per_institution = get_cities_per_institution(csv_file)
generate_geojson(cities_per_institution, output_file)