import logging import sys import csv import osmium import geopandas as gpd import shapely.wkb import geojson wkbfab = osmium.geom.WKBFactory() def get_cities_per_institution(csv_file): logging.info("Reading CSV file") cities_per_institution = {} with open(csv_file, "r") as f: reader = csv.DictReader(f, delimiter=";") for row in reader: institution = row["institution"] city = row["code_commune"] if institution == "": institution = "PN" if institution in cities_per_institution: cities_per_institution[institution].append(city) else: cities_per_institution[institution] = [city] logging.info("Found {} institutions: {}".format((len(cities_per_institution)), cities_per_institution.keys())) logging.info("Found {} cities".format(sum([len(cities) for cities in cities_per_institution.values()]))) return cities_per_institution class OSMHandler(osmium.SimpleHandler): def __init__(self): osmium.SimpleHandler.__init__(self) self.osm_data = [] def area(self, a): if (a.tags.get('admin_level') == '8' or a.tags.get('admin_level') == '9') and a.tags.get('ref:INSEE'): logging.debug("Found area with INSEE code {}".format(a.tags.get('ref:INSEE'))) geometry = shapely.wkb.loads(wkbfab.create_multipolygon(a), hex=True) self.osm_data.append({ 'id': a.orig_id(), 'insee_code': a.tags.get('ref:INSEE'), 'shape': geometry }) def generate_geojson(cities_per_institutions, output_file): logging.info("Reading OSM file") handler = OSMHandler() handler.apply_file(osm_file, locations=True) gdf = gpd.GeoDataFrame(handler.osm_data) logging.info("===== OSM file informations =====") gdf.info(verbose=True) gdf.set_index('id', inplace=True) features = [] for institution, cities in cities_per_institutions.items(): logging.info("Processing institution {}".format(institution)) # Create a GeoDataFrame with only the cities of the institution gdf[institution] = gdf['insee_code'].isin(cities) # Extract a list of all the geometries of the cities of the institution institution_gdf = gdf[gdf[institution]]['shape'] # Merge all the geometries into one institution_geometry = shapely.union_all(institution_gdf) # Create a GeoJSON feature feature = geojson.Feature(geometry=geojson.loads(geojson.dumps(institution_geometry)), properties={"institution": institution}) features.append(feature) feature_collection = geojson.FeatureCollection(features) logging.info("Writing GeoJSON file") with open(output_file, "w") as f: geojson.dump(feature_collection, f) if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) logging.info("Starting...") if len(sys.argv) != 4: logging.error("Usage: python generate_geojson.py ") sys.exit(1) csv_file = sys.argv[1] logging.info("Input:") logging.info(" CSV file: {}".format(csv_file)) osm_file = sys.argv[2] logging.info(" OSM file: {}".format(osm_file)) output_file = sys.argv[3] logging.info("Output: {}".format(output_file)) cities_per_institution = get_cities_per_institution(csv_file) generate_geojson(cities_per_institution, output_file)