diff --git a/.dockerignore b/.dockerignore index 0d364d6..9ea4a81 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,4 @@ /build /Dockerfile -gtfs-out +/push-docker-image.sh diff --git a/.forgejo/workflows/test.yml b/.forgejo/workflows/test.yml deleted file mode 100644 index 3344ec5..0000000 --- a/.forgejo/workflows/test.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: GTFS Shapes Generation - -on: [push] - -jobs: - modify_routes: - runs-on: docker - steps: - - name: Download sample feed - run: curl -o sample-feed.zip https://download.data.public.lu/resources/horaires-et-arrets-des-transport-publics-gtfs/20240530-080402/gtfs-20240529-20240621.zip - - - name: Download OSM data - run: curl -o luxembourg-latest.osm.pbf https://download.geofabrik.de/europe/luxembourg-latest.osm.pbf - - - name: Convert OSM data to .osm - run: | - apt update && \ - apt install -y osmctools && \ - osmconvert luxembourg-latest.osm.pbf -o=luxembourg-latest.osm - - - name: Run Shape Generation - uses: gtfs-actions/generate-shapes@main - with: - gtfs_file: sample-feed.zip - osm_file: luxembourg-latest.osm - mot: bus - output_file: modified-feed.zip - - - name: Upload modified feed - uses: actions/upload-artifact@v3 - with: - name: modified-feed - path: modified-feed.zip \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 5790aa1..826a1a6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,12 +1,9 @@ [submodule "src/cppgtfs"] path = src/cppgtfs - url = https://github.com/ad-freiburg/cppgtfs.git + url = https://ad-git.informatik.uni-freiburg.de/ad/cppgtfs.git [submodule "src/xml"] path = src/xml url = https://github.com/patrickbr/pfxml.git [submodule "src/configparser"] path = src/configparser url = https://git.patrickbrosi.de/patrick/configparser -[submodule "src/util"] - path = src/util - url = https://github.com/ad-freiburg/util diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..9087d87 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,37 @@ +language: cpp +os: +- linux +- osx +compiler: +- gcc +- clang +addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - cmake +before_install: +- export LD_LIBRARY_PATH=$(if [[ $CXX == "clang++" ]]; then echo -n '/usr/local/clang/lib'; + fi) +before_script: +- mkdir build +- cd build +- cmake .. +script: +- make -j4 +- ctest --verbose +- docker build -t ad-freiburg/pfaedle . +deploy: + on: + branch: master + provider: script + script: bash push-docker-image.sh +notifications: + email: + on_success: never + on_failure: always +env: + global: + - secure: W60bRWv9u28UFjmt1iO5ELtPBvUNkAPqiwwxd+boy7BazAJ0fUUBZoEpnsgv8pqKWJV6VMUvknP4taU5a6NM+3aRHuCZOjVC42Rs8oDGJoXrhmH9ZzOOp2nDnHy9hqrtRKJrYQUww+s7UjBpWcaorqHvo5iWNrt9OulKM+V2u6IQI3xI1bPoaVhK/EnHHFAWe52v0KOkaSjguL5zj7xZqCeaZKmX9PsiQdqQJVtX2zsdF/aDkDvhkAl4SxeVKrFEVDV4gPx7yqGC/uQ6YJrQXigqpWWL6oZ1cxsg2HWqLZyAYN8tIWcnaAW8+PVYLfH1iTDb6fnokD4DPpVfULz4dzqOnTuG+Qd97U2BVDJN+LdxK2d8RZ1KLAWNbFGBlkY/8zpMAtV/xhGk2vHg/pj6ZPUPncNlOzUepASw5yCY7H6SOH1NgzNNSn+Fg3Q+6eUoYWp5jrpejBcwO/tikRCfGOSNyEKTC+1joRNwySeLjTcLDcaO3+EJL64YjIKW1+YwVFopq5DKRhzjSyO+dUryA5+l+nT499BC9dxA7SvQ/tLwMs3uPlVhSDUvkH6DxiWIJQEYmTZIEA4JmLjdBFDB9FaApubvn5wRIlL07Dbq+t3XxMciWeLU3H2IQzlGPQpIbqB93L8yc6eH5Fq2Gu4HN5lmpJC3ZLpZOEqNk1kcFzo= + - secure: b10JKrMacKD+C8yGHPiOYP84ykXqBFiwm2wkcKn9SkGO6bDCtRIM+/eYMN64wQVD54Vwc/rfxqCRN6ckdAgw1zrtBdlRLz1krdwsdb7RuXJvTTTu3bd8CrtlsvcVh40vcItcFAQQKhKrPF/iDajbXm4GaVcoiqV8i8inhhfg063guC22o3D76J3xFmwyhNGv0QZuK7xG0O2h+mflU/LE8FuXrVO4+1QmvwJ9JRgBnz5F8jrEuZWipp3gJBVnpYHv4ZAy5r52zQ3iPEkji7Y6/GjvxYnjc08QM998I3SSlUuW4quiEPJFTGxK9w/UV34c0DJhzluJ3TGTz+RkhejIDUcNiKqFKWZCzCwzcx5f96RmTh7MTHulB7zTkK0hzSSPFjrYrRrkN/FCwMrHaLs1H3SQbXiB2Ga2pnfaOVfCbM5KraZHlk2xHUIqVHkhyetETBW76d4g6vxjNoe++siyx+eUW2VMj7Y+6c1HQPceGi+jpl9pJK6ZXKfVpfWjFj29qqnv7lNjoI3PXGllswDV4KxP/A5A4MBqHAcVOFEdTro5EReUhepyNYM7lUaS//Wa6DzE6US13bVpIr4fC+bKUf8XizfGp+f9TSc+Shl0J6asejxIbgQRCopTJd805xAFoCsoK1yt/sZIplBO/mjaBjLc9Y1+A04VH5FQWl92rHQ= diff --git a/CMakeLists.txt b/CMakeLists.txt index b045164..008bd55 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,23 +1,37 @@ -cmake_minimum_required (VERSION 3.5) -set(CMAKE_CXX_STANDARD 11) +cmake_minimum_required (VERSION 2.8) project (pfaedle) if (CMAKE_BUILD_TYPE) - string(SUBSTRING ${CMAKE_BUILD_TYPE} 0 1 FIRST_CHAR) - string(TOUPPER ${FIRST_CHAR} FIRST_CHAR) - string(REGEX REPLACE "^.(.*)" "${FIRST_CHAR}\\1" CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}") + string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE) endif() +if (CMAKE_BUILD_TYPE STREQUAL "DEBUG") + set(CPPLINT "${CMAKE_SOURCE_DIR}/cpplint.py") + include(cmake/cpplint.cmake) +endif() + +set(CPPLINT_PROJECT_ROOT "src") enable_testing() set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/") -set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}") +set(EXECUTABLE_OUTPUT_PATH "${CMAKE_SOURCE_DIR}/build") + + +find_package(OpenMP) +if (OPENMP_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +endif() + # set compiler flags, see http://stackoverflow.com/questions/7724569/debug-vs-release-in-cmake -set(CMAKE_CXX_FLAGS "-Ofast -fno-signed-zeros -fno-trapping-math -Wall -Wno-format-extra-args -Wextra -Wformat-nonliteral -Wformat-security -Wformat=2 -Wextra -Wno-implicit-fallthrough -pedantic -Wno-keyword-macro") -set(CMAKE_CXX_FLAGS_SANITIZE "-Og -g -fsanitize=address -fsanitize=leak -fsanitize=undefined -DLOGLEVEL=3 -DPFAEDLE_DBG=1") -set(CMAKE_CXX_FLAGS_PROFILE "-g -pg -DLOGLEVEL=3 -DPFAEDLE_DBG=1") +if(OPENMP_FOUND) + set(CMAKE_CXX_FLAGS "-fopenmp -Ofast -fno-signed-zeros -fno-trapping-math -Wall -Wno-format-extra-args -Wextra -Wformat-nonliteral -Wformat-security -Wformat=2 -Wextra -Wno-implicit-fallthrough -pedantic") +else() + message(WARNING "Configuring without OpenMP!") + set(CMAKE_CXX_FLAGS "-Ofast -fno-signed-zeros -fno-trapping-math -Wall -Wno-format-extra-args -Wextra -Wformat-nonliteral -Wformat-security -Wformat=2 -Wextra -Wno-implicit-fallthrough -pedantic") +endif() set(CMAKE_CXX_FLAGS_DEBUG "-Og -g -DLOGLEVEL=3 -DPFAEDLE_DBG=1") set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -DLOGLEVEL=2") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -DLOGLEVEL=2") @@ -26,28 +40,24 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -g -DLOGLEVEL=3") # export compile commands to tools like clang set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +# Compiler-specific C++11 activation. +if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") + execute_process( + COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + if ((GCC_VERSION VERSION_GREATER 4.8 OR GCC_VERSION VERSION_EQUAL 4.8)) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + else () + message(FATAL_ERROR "${PROJECT_NAME} requires g++ 4.8 or greater!") + endif () +elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++ -std=c++11") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +else () + message(FATAL_ERROR "Your C++ compiler does not support C++11.") +endif () + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPFAEDLE_PRECISION=${PFAEDLE_PRECISION}") -find_package(LibZip) -find_package(ZLIB) -find_package(BZip2) - -if (LIBZIP_FOUND) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLIBZIP_FOUND=1") -endif() - -if (ZLIB_FOUND) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DZLIB_FOUND=1") -else () - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPFXML_NO_ZLIB=1") -endif() - -if (BZIP2_FOUND) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBZLIB_FOUND=1") -else () - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPFXML_NO_BZLIB=1") -endif() - # http://brianmilco.blogspot.de/2012/11/cmake-automatically-use-git-tags-as.html include(GetGitRevisionDescription) git_get_tag(VERSION_GIT) @@ -71,7 +81,14 @@ add_subdirectory(src) # tests add_test("utilTest" utilTest) -add_test("pfaedleTest" pfaedleTest) + +# custom eval target + +add_custom_target( + eval + COMMAND make + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}//eval +) # handles install target install( @@ -79,11 +96,6 @@ install( ) install( - FILES ${CMAKE_BINARY_DIR}/pfaedle DESTINATION bin - PERMISSIONS OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE COMPONENT binaries -) - -install( - FILES ${CMAKE_BINARY_DIR}/shapevl DESTINATION bin + FILES build/pfaedle DESTINATION bin PERMISSIONS OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE COMPONENT binaries ) diff --git a/Dockerfile b/Dockerfile index d51404a..5012f35 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,9 @@ -FROM debian:bookworm-slim AS builder +FROM debian:buster-slim AS builder WORKDIR /app RUN apt-get update && \ - apt-get install -y g++ cmake git libzip-dev zlib1g-dev libbz2-dev + apt-get install -y g++ cmake git ADD . /app RUN mkdir build && \ @@ -13,10 +13,10 @@ RUN mkdir build && \ pwd && \ make install -FROM debian:bookworm-slim +FROM debian:buster-slim RUN apt-get update && \ - apt-get install -y libzip4 zlib1g libbz2-1.0 && \ + apt-get install -y libgomp1 && \ rm -rf /var/lib/apt/lists/* COPY --from=builder /usr/local/etc/pfaedle /usr/local/etc/pfaedle diff --git a/README.md b/README.md index 45384eb..a860d35 100644 --- a/README.md +++ b/README.md @@ -1,51 +1,47 @@ -[![Left: station-to-station path of a single train through Switzerland obtained from schedule timetable data. Right: path of the same train map-matched by pfaedle.](geo/schweiz_ex_res.png?raw=true)](geo/schweiz_ex.png?raw=true) +[![Left: station-to-station path of a single train through Switzerland obtained from schedule timetable data. Right: path of the same train map-matched by pfaedle.](geo/schweiz_ex_res.png?raw=true)](geo/schweiz_ex.png?raw=true) *Left: station-to-station path of a single train through Switzerland obtained from official schedule data. Right: path of the same train map-matched by pfaedle.* -[![Left: station-to-station path of a single bus through Stuttgart obtained from official schedule data. Right: path of the same bus map-matched by pfaedle.](geo/stuttgart_ex_res.png?raw=true)](geo/stuttgart_ex.png?raw=true) +[![Left: station-to-station path of a single bus through Stuttgart obtained from official schedule data. Right: path of the same bus map-matched by pfaedle.](geo/stuttgart_ex_res.png?raw=true)](geo/stuttgart_ex.png?raw=true) *Left: station-to-station path of a single bus through Stuttgart obtained from official schedule data. Right: path of the same bus map-matched by pfaedle.* -[![Build](https://github.com/ad-freiburg/pfaedle/actions/workflows/build.yml/badge.svg)](https://github.com/ad-freiburg/pfaedle/actions/workflows/build.yml) +[![Build +Status](https://travis-ci.org/ad-freiburg/pfaedle.svg?branch=master)](https://travis-ci.org/ad-freiburg/pfaedle) # pfaedle Precise OpenStreetMap (OSM) map-matching for public transit schedules ([GTFS](https://developers.google.com/transit/gtfs/reference/) data). -First described in [this 2018 SIGSPATIAL paper](http://ad-publications.informatik.uni-freiburg.de/SIGSPATIAL_Sparse%20map%20matching%202018.pdf). - -For a quick visual inspection of the shape quality, see for example the schedule data for Germany or Switzerland in our tool [TRAVIC](https://travic.app/?z=7&x=1261608.6&y=6430601.6). +Implementation and evaluation code for our paper [Sparse Map-Matching in Public Transit Networks with Turn Restrictions](http://ad-publications.informatik.uni-freiburg.de/SIGSPATIAL_Sparse%20map%20matching%202018.pdf). ## Requirements * `cmake` - * `gcc >= 5.0` (or `clang >= 3.9`) - * `libzip` (*optional*, for ZIP support) - * `zlib` (*optional*, for gzip support) - * `libbz2` (*optional*, for bzip2 support) + * `gcc >= 4.9` (or `clang >= 5.0`) ## Building and Installation Fetch this repository and init submodules: -```shell -$ git clone --recurse-submodules https://github.com/ad-freiburg/pfaedle +``` +git clone --recurse-submodules https://github.com/ad-freiburg/pfaedle ``` -```shell -$ mkdir build && cd build -$ cmake .. -$ make -j +``` +mkdir build && cd build +cmake .. +make -j ``` To install, type -```shell -$ make install +``` +make install ``` # General Usage ## Generating shapes for a GTFS feed -```shell -$ pfaedle -x +``` +pfaedle -x ``` A shape'd version of the input GTFS feed will be written to `./gtfs-out`. @@ -53,19 +49,19 @@ A shape'd version of the input GTFS feed will be written to `./gtfs-out`. By default, shapes are only calculated for trips that don't have a shape in the input feed. To drop all existing shapes, use the `-D` flag. -For example, you may generate (and replace existing, see `-D` flag) shapes for the GTFS dataset for Freiburg like this: +For example, you may generate (and replace existing, see -D parameter) shapes for the GTFS dataset for Freiburg like this: -```shell -$ wget https://fritz.freiburg.de/csv_Downloads/VAGFR.zip -$ wget http://download.geofabrik.de/europe/germany/baden-wuerttemberg/freiburg-regbez-latest.osm.bz2 -$ pfaedle -D -x freiburg-regbez-latest.osm.bz2 VAGFR.zip +``` +$ wget https://fritz.freiburg.de/csv_Downloads/VAGFR.zip && unzip VAGFR.zip +$ wget http://download.geofabrik.de/europe/germany/baden-wuerttemberg/freiburg-regbez-latest.osm.bz2 && bunzip2 freiburg-regbez-latest.osm.bz2 +$ pfaedle -D -x freiburg-regbez-latest.osm . ``` ## Generating shapes for a specific MOT To generate shapes for a specific mot only, use the `-m` option. Possible values are either `tram`, `bus`, `coach`, `rail`, `subway`, `ferry`, `funicular`, -`gondola`, `all` (default) or GTFS route type codes (0, 1, 2, 3, 4, 5, 6, 7, or [extended route types](https://developers.google.com/transit/gtfs/reference/extended-route-types)). Integer codes will only match the specific route type, while string codes will match classes of route types. For example, `-m 101` will only match routes with `route_type` `101` (high speed rail), while `-m rail` will match any rail service encoded via a standard `route_type` `2` or an extended `route_type` describing a rail service (e.g. `100`, `101`, `102`, ...). +`gondola`, `all` (default) or GTFS vehicle type codes (0, 1, 2, 3, 4, 5, 6, 7). Multiple values can be specified (comma separated). @@ -81,20 +77,16 @@ run. ## via Docker -You can use the [Docker image](https://github.com/orgs/ad-freiburg/packages/container/package/pfaedle) by mounting the OSM & GTFS data into the container: +You can use the [`ad-freiburg/pfaedle` Docker image](https://hub.docker.com/repository/docker/ad-freiburg/pfaedle) by mounting the OSM & GTFS data into the container: ```shell -$ docker pull ghcr.io/ad-freiburg/pfaedle:latest -$ docker run -i --rm \ +docker run -i --rm \ # mount OSM data --volume /path/to/osm/data:/osm \ # mount GTFS data --volume /path/to/gtfs/data:/gtfs \ - # mount default output folder gtfs-out - --volume /path/to/output-dir:/gtfs-out \ - ghcr.io/ad-freiburg/pfaedle:latest \ # tell pfaedle where to find the data - -x /osm/osm-data.xml.bz2 -i /gtfs/myfeed.zip + pfaedle -x /osm/osm-data.xml -i /gtfs ``` ## Debugging @@ -104,12 +96,48 @@ The following flags may be useful for debugging: * `-T ` only calculate shape for a single trip (specified via its GTFS trip id) and output it as GeoJSON to `/path.json` * `--write-graph` write the graph used for routing as GeoJSON to + `/graph.json` + * `--write-cgraph` if `-T` is set, write the combination graph used for + routing as GeoJSON to `/combgraph.json` * `--write-trgraph` write the complete network graph to `/trgraph.json` # Configuration A default configuration file `pfaedle.cfg` can be found in this repo and will be installed with `make install`. Custom configuration files can be specified with the `-c` flag. If no `-c` flag is set, `pfaedle` will parse and merge the following cfg files in the given order (if present): `/etc/pfaedle/pfaedle.cfg`, `$HOME/.config/pfaedle/pfaedle.cfg`, `/pfaedle.cfg`. Values given in later files will overwrite earlier defined values. -# Attribution +# Evaluation -Note that the `shapes.txt` produced by `pfaedle` is based on OpenStreetMap data, which is licensed under ODbL 1.0 (see [here](https://osm.org/copyright)). If you copy, distribute, transmit or adapt the shapefied GTFS feed, please credit the contributors of OpenStreetMap. +You may run an entire evaluation of our testing datasets Vitoria-Gasteiz, Paris, Switzerland and +Stuttgart with + +``` +mkdir build && cd build +cmake .. +make -j +make eval +``` + +*Notes:* + * this will download, and filter, the entire OSM files for Spain and the +Stuttgart region. Make sure you have enough space left on your hard drive. + * in evaluation mode, pfaedle needs significantly more time, because the + calculation of the similarity measurements between shapes are expensive + * if you are only interested in the end results of a single dataset, run + `make .lighteval` in `/eval`. For example, `make paris.lighteval` + generates a shaped version of the paris dataset, without doing extensive + comparisons to the ground truth. + * similarily, if you want to run the extensive evaluation for a single dataset, + run `make .eval` in `/eval`. + + +## Evaluation requirements + + * zlib + +On Debianesque systems, type + +``` +sudo apt-get install zlib1g-dev +``` + +to install the dependencies. diff --git a/action.yml b/action.yml deleted file mode 100644 index 6fe43de..0000000 --- a/action.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: 'Generate shapes from OSM data' -description: 'Use pfaedle to generate shapes from OSM data for a GTFS feed.' - -inputs: - gtfs_file: - description: 'Path to GTFS .zip file.' - required: true - osm_file: - description: 'Path to OSM .pbf file.' - required: true - mot: - description: 'Mode of transport to generate shapes for.' - required: false - default: 'all' - output_file: - description: 'Path to output GTFS .zip file.' - required: true - -runs: - using: 'docker' - image: 'Dockerfile' - args: - - '-i' - - ${{ inputs.gtfs_file }} - - '-x' - - ${{ inputs.osm_file }} - - '-m' - - ${{ inputs.mot }} - - '-o' - - ${{ inputs.output_file }} \ No newline at end of file diff --git a/cmake/FindLibZip.cmake b/cmake/FindLibZip.cmake deleted file mode 100644 index b6f4237..0000000 --- a/cmake/FindLibZip.cmake +++ /dev/null @@ -1,52 +0,0 @@ -# CMake module to search for libzip -# -# Once done this will define -# -# LIBZIP_FOUND - system has the zip library -# LIBZIP_INCLUDE_DIRS - the zip include directories -# LIBZIP_LIBRARY - Link this to use the zip library -# -# Copyright (c) 2017, Paul Blottiere, -# Copyright (c) 2017, Larry Shaffer, -# Add support for finding zipconf.h in separate location, e.g. on macOS -# -# Redistribution and use is allowed according to the terms of the BSD license. -# For details see the accompanying COPYING-CMAKE-SCRIPTS file. - -FIND_PATH(LIBZIP_INCLUDE_DIR - zip.h - "$ENV{LIB_DIR}/include" - "$ENV{INCLUDE}" - /usr/local/include - /usr/include -) - -FIND_PATH(LIBZIP_CONF_INCLUDE_DIR - zipconf.h - "$ENV{LIB_DIR}/include" - "$ENV{LIB_DIR}/lib/libzip/include" - "$ENV{LIB}/lib/libzip/include" - /usr/local/lib/libzip/include - /usr/lib/libzip/include - /usr/local/include - /usr/include - "$ENV{INCLUDE}" -) - -FIND_LIBRARY(LIBZIP_LIBRARY NAMES zip PATHS "$ENV{LIB_DIR}/lib" "$ENV{LIB}" /usr/local/lib /usr/lib ) - -INCLUDE(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibZip DEFAULT_MSG - LIBZIP_LIBRARY LIBZIP_INCLUDE_DIR LIBZIP_CONF_INCLUDE_DIR) - -SET(LIBZIP_INCLUDE_DIRS ${LIBZIP_INCLUDE_DIR} ${LIBZIP_CONF_INCLUDE_DIR}) -MARK_AS_ADVANCED(LIBZIP_LIBRARY LIBZIP_INCLUDE_DIR LIBZIP_CONF_INCLUDE_DIR LIBZIP_INCLUDE_DIRS) - -IF (LIBZIP_FOUND) - MESSAGE(STATUS "Found libzip: ${LIBZIP_LIBRARY}") -ELSE (LIBZIP_FOUND) - SET(LIBZIP_LIBRARY "") - SET(LIBZIP_INCLUDE_DIR "") - SET(LIBZIP_CONF_INCLUDE_DIR "") - MESSAGE(STATUS "Could not find libzip") -ENDIF (LIBZIP_FOUND) diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake new file mode 100644 index 0000000..6751f0f --- /dev/null +++ b/cmake/cpplint.cmake @@ -0,0 +1,133 @@ +# +# CMake module to C++ static analysis against +# Google C++ Style Guide (https://google.github.io/styleguide/cppguide.html) +# +# For more detials please follow links: +# +# - https://github.com/google/styleguide +# - https://pypi.python.org/pypi/cpplint +# - https://github.com/theandrewdavis/cpplint +# +# Copyright (c) 2016 Piotr L. Figlarek +# +# Usage +# ----- +# Include this module via CMake include(...) command and then add each source directory +# via introduced by this module cpplint_add_subdirectory(...) function. Added directory +# will be recursivelly scanned and all available files will be checked. +# +# Example +# ------- +# # include CMake module +# include(cmake/cpplint.cmake) +# +# # add all source code directories +# cpplint_add_subdirectory(core) +# cpplint_add_subdirectory(modules/c-bind) +# +# License (MIT) +# ------------- +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +# select files extensions to check +option(CPPLINT_TEST_C_FILES "Check *.c files" ON) +option(CPPLINT_TEST_H_FILES "Check *.h files" ON) +option(CPPLINT_TEST_CPP_FILES "Check *.cpp files" ON) +option(CPPLINT_TEST_HPP_FILES "Check *.hpp files" ON) +option(CPPLINT_TEST_TPP_FILES "Check *.tpp files" ON) + +# target to run cpplint.py for all configured sources +set(CPPLINT_TARGET lint CACHE STRING "Name of C++ style checker target") + +# project root directory +set(CPPLINT_PROJECT_ROOT "${PROJECT_SOURCE_DIR}" CACHE STRING "Project ROOT directory") + + +# find cpplint.py script +if(CPPLINT) + message(STATUS "cpplint parser: ${CPPLINT}") +else() + message(FATAL_ERROR "cpplint script: NOT FOUND! " + "Please set the CPPLINT variable.") +endif() + + +# common target to concatenate all cpplint.py targets +add_custom_target(${CPPLINT_TARGET} ALL) + + +# use cpplint.py to check source code files inside DIR directory +function(cpplint_add_subdirectory DIR) + # create relative path to the directory + set(ABSOLUTE_DIR ${CMAKE_CURRENT_LIST_DIR}/${DIR}) + + # add *.c files + if(CPPLINT_TEST_C_FILES) + set(EXTENSIONS ${EXTENSIONS}c,) + set(FILES_TO_CHECK ${FILES_TO_CHECK} ${ABSOLUTE_DIR}/*.c) + endif() + + # add *.h files + if(CPPLINT_TEST_H_FILES) + set(EXTENSIONS ${EXTENSIONS}h,) + set(FILES_TO_CHECK ${FILES_TO_CHECK} ${ABSOLUTE_DIR}/*.h) + endif() + + # add *.cpp files + if(CPPLINT_TEST_CPP_FILES) + set(EXTENSIONS ${EXTENSIONS}cpp,) + set(FILES_TO_CHECK ${FILES_TO_CHECK} ${ABSOLUTE_DIR}/*.cpp) + endif() + + # add *.hpp files + if(CPPLINT_TEST_HPP_FILES) + set(EXTENSIONS ${EXTENSIONS}hpp,) + set(FILES_TO_CHECK ${FILES_TO_CHECK} ${ABSOLUTE_DIR}/*.hpp) + endif() + + # add *.tpp files + if(CPPLINT_TEST_TPP_FILES) + set(EXTENSIONS ${EXTENSIONS}tpp,) + set(FILES_TO_CHECK ${FILES_TO_CHECK} ${ABSOLUTE_DIR}/*.tpp) + endif() + + # find all source files inside project + file(GLOB_RECURSE LIST_OF_FILES ${FILES_TO_CHECK}) + + # create valid target name for this check + string(REGEX REPLACE "/" "." TEST_NAME ${DIR}) + set(TARGET_NAME ${CPPLINT_TARGET}.${TEST_NAME}) + + # perform cpplint check + add_custom_target(${TARGET_NAME} + COMMAND ${CPPLINT} "--extensions=${EXTENSIONS}" + "--root=${CPPLINT_PROJECT_ROOT}" + "--quiet" + ${LIST_OF_FILES} + DEPENDS ${LIST_OF_FILES} + COMMENT "cpplint: Checking source code style" + ) + + # run this target when root cpplint.py test is triggered + add_dependencies(${CPPLINT_TARGET} ${TARGET_NAME}) + + # add this test to CTest + add_test(${TARGET_NAME} ${CMAKE_MAKE_PROGRAM} ${TARGET_NAME}) +endfunction() diff --git a/cpplint.py b/cpplint.py new file mode 100755 index 0000000..294337d --- /dev/null +++ b/cpplint.py @@ -0,0 +1,6233 @@ +#!/usr/bin/env python2 +# +# Copyright (c) 2009 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Does google-lint on c++ files. + +The goal of this script is to identify places in the code that *may* +be in non-compliance with google style. It does not attempt to fix +up these problems -- the point is to educate. It does also not +attempt to find all problems, or to ensure that everything it does +find is legitimately a problem. + +In particular, we can get very confused by /* and // inside strings! +We do a small hack, which is to ignore //'s with "'s after them on the +same line, but it is far from perfect (in either direction). +""" + +import codecs +import copy +import getopt +import math # for log +import os +import re +import sre_compile +import string +import sys +import unicodedata + +try: + xrange # Python 2 +except NameError: + xrange = range # Python 3 + + +_USAGE = """ +Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...] + [--counting=total|toplevel|detailed] [--root=subdir] + [--linelength=digits] [--headers=x,y,...] + [--quiet] + [file] ... + + The style guidelines this tries to follow are those in + https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml + + Every problem is given a confidence score from 1-5, with 5 meaning we are + certain of the problem, and 1 meaning it could be a legitimate construct. + This will miss some errors, and is not a substitute for a code review. + + To suppress false-positive errors of a certain category, add a + 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*) + suppresses errors of all categories on that line. + + The files passed in will be linted; at least one file must be provided. + Default linted extensions are .cc, .cpp, .cu, .cuh and .h. Change the + extensions with the --extensions flag. + + Flags: + + output=vs7 + By default, the output is formatted to ease emacs parsing. Visual Studio + compatible output (vs7) may also be used. Other formats are unsupported. + + verbose=# + Specify a number 0-5 to restrict errors to certain verbosity levels. + + quiet + Don't print anything if no errors are found. + + filter=-x,+y,... + Specify a comma-separated list of category-filters to apply: only + error messages whose category names pass the filters will be printed. + (Category names are printed with the message and look like + "[whitespace/indent]".) Filters are evaluated left to right. + "-FOO" and "FOO" means "do not print categories that start with FOO". + "+FOO" means "do print categories that start with FOO". + + Examples: --filter=-whitespace,+whitespace/braces + --filter=whitespace,runtime/printf,+runtime/printf_format + --filter=-,+build/include_what_you_use + + To see a list of all the categories used in cpplint, pass no arg: + --filter= + + counting=total|toplevel|detailed + The total number of errors found is always printed. If + 'toplevel' is provided, then the count of errors in each of + the top-level categories like 'build' and 'whitespace' will + also be printed. If 'detailed' is provided, then a count + is provided for each category like 'build/class'. + + root=subdir + The root directory used for deriving header guard CPP variable. + By default, the header guard CPP variable is calculated as the relative + path to the directory that contains .git, .hg, or .svn. When this flag + is specified, the relative path is calculated from the specified + directory. If the specified directory does not exist, this flag is + ignored. + + Examples: + Assuming that top/src/.git exists (and cwd=top/src), the header guard + CPP variables for top/src/chrome/browser/ui/browser.h are: + + No flag => CHROME_BROWSER_UI_BROWSER_H_ + --root=chrome => BROWSER_UI_BROWSER_H_ + --root=chrome/browser => UI_BROWSER_H_ + --root=.. => SRC_CHROME_BROWSER_UI_BROWSER_H_ + + linelength=digits + This is the allowed line length for the project. The default value is + 80 characters. + + Examples: + --linelength=120 + + extensions=extension,extension,... + The allowed file extensions that cpplint will check + + Examples: + --extensions=hpp,cpp + + headers=x,y,... + The header extensions that cpplint will treat as .h in checks. Values are + automatically added to --extensions list. + + Examples: + --headers=hpp,hxx + --headers=hpp + + cpplint.py supports per-directory configurations specified in CPPLINT.cfg + files. CPPLINT.cfg file can contain a number of key=value pairs. + Currently the following options are supported: + + set noparent + filter=+filter1,-filter2,... + exclude_files=regex + linelength=80 + root=subdir + headers=x,y,... + + "set noparent" option prevents cpplint from traversing directory tree + upwards looking for more .cfg files in parent directories. This option + is usually placed in the top-level project directory. + + The "filter" option is similar in function to --filter flag. It specifies + message filters in addition to the |_DEFAULT_FILTERS| and those specified + through --filter command-line flag. + + "exclude_files" allows to specify a regular expression to be matched against + a file name. If the expression matches, the file is skipped and not run + through liner. + + "linelength" allows to specify the allowed line length for the project. + + The "root" option is similar in function to the --root flag (see example + above). Paths are relative to the directory of the CPPLINT.cfg. + + The "headers" option is similar in function to the --headers flag + (see example above). + + CPPLINT.cfg has an effect on files in the same directory and all + sub-directories, unless overridden by a nested configuration file. + + Example file: + filter=-build/include_order,+build/include_alpha + exclude_files=.*\.cc + + The above example disables build/include_order warning and enables + build/include_alpha as well as excludes all .cc from being + processed by linter, in the current directory (where the .cfg + file is located) and all sub-directories. +""" + +# We categorize each error message we print. Here are the categories. +# We want an explicit list so we can list them all in cpplint --filter=. +# If you add a new error message with a new category, add it to the list +# here! cpplint_unittest.py should tell you if you forget to do this. +_ERROR_CATEGORIES = [ + 'build/class', + 'build/c++11', + 'build/c++14', + 'build/c++tr1', + 'build/deprecated', + 'build/endif_comment', + 'build/explicit_make_pair', + 'build/forward_decl', + 'build/header_guard', + 'build/include', + 'build/include_alpha', + 'build/include_order', + 'build/include_what_you_use', + 'build/namespaces', + 'build/printf_format', + 'build/storage_class', + 'legal/copyright', + 'readability/alt_tokens', + 'readability/braces', + 'readability/casting', + 'readability/check', + 'readability/constructors', + 'readability/fn_size', + 'readability/inheritance', + 'readability/multiline_comment', + 'readability/multiline_string', + 'readability/namespace', + 'readability/nolint', + 'readability/nul', + 'readability/strings', + 'readability/todo', + 'readability/utf8', + 'runtime/arrays', + 'runtime/casting', + 'runtime/explicit', + 'runtime/int', + 'runtime/init', + 'runtime/invalid_increment', + 'runtime/member_string_references', + 'runtime/memset', + 'runtime/indentation_namespace', + 'runtime/operator', + 'runtime/printf', + 'runtime/printf_format', + 'runtime/references', + 'runtime/string', + 'runtime/threadsafe_fn', + 'runtime/vlog', + 'whitespace/blank_line', + 'whitespace/braces', + 'whitespace/comma', + 'whitespace/comments', + 'whitespace/empty_conditional_body', + 'whitespace/empty_if_body', + 'whitespace/empty_loop_body', + 'whitespace/end_of_line', + 'whitespace/ending_newline', + 'whitespace/forcolon', + 'whitespace/indent', + 'whitespace/line_length', + 'whitespace/newline', + 'whitespace/operators', + 'whitespace/parens', + 'whitespace/semicolon', + 'whitespace/tab', + 'whitespace/todo', + ] + +# These error categories are no longer enforced by cpplint, but for backwards- +# compatibility they may still appear in NOLINT comments. +_LEGACY_ERROR_CATEGORIES = [ + 'readability/streams', + 'readability/function', + ] + +# The default state of the category filter. This is overridden by the --filter= +# flag. By default all errors are on, so only add here categories that should be +# off by default (i.e., categories that must be enabled by the --filter= flags). +# All entries here should start with a '-' or '+', as in the --filter= flag. +_DEFAULT_FILTERS = ['-build/include_alpha'] + +# The default list of categories suppressed for C (not C++) files. +_DEFAULT_C_SUPPRESSED_CATEGORIES = [ + 'readability/casting', + ] + +# The default list of categories suppressed for Linux Kernel files. +_DEFAULT_KERNEL_SUPPRESSED_CATEGORIES = [ + 'whitespace/tab', + ] + +# We used to check for high-bit characters, but after much discussion we +# decided those were OK, as long as they were in UTF-8 and didn't represent +# hard-coded international strings, which belong in a separate i18n file. + +# C++ headers +_CPP_HEADERS = frozenset([ + # Legacy + 'algobase.h', + 'algo.h', + 'alloc.h', + 'builtinbuf.h', + 'bvector.h', + 'complex.h', + 'defalloc.h', + 'deque.h', + 'editbuf.h', + 'fstream.h', + 'function.h', + 'hash_map', + 'hash_map.h', + 'hash_set', + 'hash_set.h', + 'hashtable.h', + 'heap.h', + 'indstream.h', + 'iomanip.h', + 'iostream.h', + 'istream.h', + 'iterator.h', + 'list.h', + 'map.h', + 'multimap.h', + 'multiset.h', + 'ostream.h', + 'pair.h', + 'parsestream.h', + 'pfstream.h', + 'procbuf.h', + 'pthread_alloc', + 'pthread_alloc.h', + 'rope', + 'rope.h', + 'ropeimpl.h', + 'set.h', + 'slist', + 'slist.h', + 'stack.h', + 'stdiostream.h', + 'stl_alloc.h', + 'stl_relops.h', + 'streambuf.h', + 'stream.h', + 'strfile.h', + 'strstream.h', + 'tempbuf.h', + 'tree.h', + 'type_traits.h', + 'vector.h', + # 17.6.1.2 C++ library headers + 'algorithm', + 'array', + 'atomic', + 'bitset', + 'chrono', + 'codecvt', + 'complex', + 'condition_variable', + 'deque', + 'exception', + 'forward_list', + 'fstream', + 'functional', + 'future', + 'initializer_list', + 'iomanip', + 'ios', + 'iosfwd', + 'iostream', + 'istream', + 'iterator', + 'limits', + 'list', + 'locale', + 'map', + 'memory', + 'mutex', + 'new', + 'numeric', + 'ostream', + 'queue', + 'random', + 'ratio', + 'regex', + 'scoped_allocator', + 'set', + 'sstream', + 'stack', + 'stdexcept', + 'streambuf', + 'string', + 'strstream', + 'system_error', + 'thread', + 'tuple', + 'typeindex', + 'typeinfo', + 'type_traits', + 'unordered_map', + 'unordered_set', + 'utility', + 'valarray', + 'vector', + # 17.6.1.2 C++ headers for C library facilities + 'cassert', + 'ccomplex', + 'cctype', + 'cerrno', + 'cfenv', + 'cfloat', + 'cinttypes', + 'ciso646', + 'climits', + 'clocale', + 'cmath', + 'csetjmp', + 'csignal', + 'cstdalign', + 'cstdarg', + 'cstdbool', + 'cstddef', + 'cstdint', + 'cstdio', + 'cstdlib', + 'cstring', + 'ctgmath', + 'ctime', + 'cuchar', + 'cwchar', + 'cwctype', + ]) + +# Type names +_TYPES = re.compile( + r'^(?:' + # [dcl.type.simple] + r'(char(16_t|32_t)?)|wchar_t|' + r'bool|short|int|long|signed|unsigned|float|double|' + # [support.types] + r'(ptrdiff_t|size_t|max_align_t|nullptr_t)|' + # [cstdint.syn] + r'(u?int(_fast|_least)?(8|16|32|64)_t)|' + r'(u?int(max|ptr)_t)|' + r')$') + + +# These headers are excluded from [build/include] and [build/include_order] +# checks: +# - Anything not following google file name conventions (containing an +# uppercase character, such as Python.h or nsStringAPI.h, for example). +# - Lua headers. +_THIRD_PARTY_HEADERS_PATTERN = re.compile( + r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$') + +# Pattern for matching FileInfo.BaseName() against test file name +_TEST_FILE_SUFFIX = r'(_test|_unittest|_regtest)$' + +# Pattern that matches only complete whitespace, possibly across multiple lines. +_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL) + +# Assertion macros. These are defined in base/logging.h and +# testing/base/public/gunit.h. +_CHECK_MACROS = [ + 'DCHECK', 'CHECK', + 'EXPECT_TRUE', 'ASSERT_TRUE', + 'EXPECT_FALSE', 'ASSERT_FALSE', + ] + +# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE +_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS]) + +for op, replacement in [('==', 'EQ'), ('!=', 'NE'), + ('>=', 'GE'), ('>', 'GT'), + ('<=', 'LE'), ('<', 'LT')]: + _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement + _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement + _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement + _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement + +for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), + ('>=', 'LT'), ('>', 'LE'), + ('<=', 'GT'), ('<', 'GE')]: + _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement + _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement + +# Alternative tokens and their replacements. For full list, see section 2.5 +# Alternative tokens [lex.digraph] in the C++ standard. +# +# Digraphs (such as '%:') are not included here since it's a mess to +# match those on a word boundary. +_ALT_TOKEN_REPLACEMENT = { + 'and': '&&', + 'bitor': '|', + 'or': '||', + 'xor': '^', + 'compl': '~', + 'bitand': '&', + 'and_eq': '&=', + 'or_eq': '|=', + 'xor_eq': '^=', + 'not': '!', + 'not_eq': '!=' + } + +# Compile regular expression that matches all the above keywords. The "[ =()]" +# bit is meant to avoid matching these keywords outside of boolean expressions. +# +# False positives include C-style multi-line comments and multi-line strings +# but those have always been troublesome for cpplint. +_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile( + r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)') + + +# These constants define types of headers for use with +# _IncludeState.CheckNextIncludeOrder(). +_C_SYS_HEADER = 1 +_CPP_SYS_HEADER = 2 +_LIKELY_MY_HEADER = 3 +_POSSIBLE_MY_HEADER = 4 +_OTHER_HEADER = 5 + +# These constants define the current inline assembly state +_NO_ASM = 0 # Outside of inline assembly block +_INSIDE_ASM = 1 # Inside inline assembly block +_END_ASM = 2 # Last line of inline assembly block +_BLOCK_ASM = 3 # The whole block is an inline assembly block + +# Match start of assembly blocks +_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' + r'(?:\s+(volatile|__volatile__))?' + r'\s*[{(]') + +# Match strings that indicate we're working on a C (not C++) file. +_SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|' + r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))') + +# Match string that indicates we're working on a Linux Kernel file. +_SEARCH_KERNEL_FILE = re.compile(r'\b(?:LINT_KERNEL_FILE)') + +_regexp_compile_cache = {} + +# {str, set(int)}: a map from error categories to sets of linenumbers +# on which those errors are expected and should be suppressed. +_error_suppressions = {} + +# The root directory used for deriving header guard CPP variable. +# This is set by --root flag. +_root = None +_root_debug = False + +# The allowed line length of files. +# This is set by --linelength flag. +_line_length = 80 + +# The allowed extensions for file names +# This is set by --extensions flag. +_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh']) + +# Treat all headers starting with 'h' equally: .h, .hpp, .hxx etc. +# This is set by --headers flag. +_hpp_headers = set(['h']) + +# {str, bool}: a map from error categories to booleans which indicate if the +# category should be suppressed for every line. +_global_error_suppressions = {} + +def ProcessHppHeadersOption(val): + global _hpp_headers + try: + _hpp_headers = set(val.split(',')) + # Automatically append to extensions list so it does not have to be set 2 times + _valid_extensions.update(_hpp_headers) + except ValueError: + PrintUsage('Header extensions must be comma seperated list.') + +def IsHeaderExtension(file_extension): + return file_extension in _hpp_headers + +def ParseNolintSuppressions(filename, raw_line, linenum, error): + """Updates the global list of line error-suppressions. + + Parses any NOLINT comments on the current line, updating the global + error_suppressions store. Reports an error if the NOLINT comment + was malformed. + + Args: + filename: str, the name of the input file. + raw_line: str, the line of input text, with comments. + linenum: int, the number of the current line. + error: function, an error handler. + """ + matched = Search(r'\bNOLINT(NEXTLINE)?\b(\([^)]+\))?', raw_line) + if matched: + if matched.group(1): + suppressed_line = linenum + 1 + else: + suppressed_line = linenum + category = matched.group(2) + if category in (None, '(*)'): # => "suppress all" + _error_suppressions.setdefault(None, set()).add(suppressed_line) + else: + if category.startswith('(') and category.endswith(')'): + category = category[1:-1] + if category in _ERROR_CATEGORIES: + _error_suppressions.setdefault(category, set()).add(suppressed_line) + elif category not in _LEGACY_ERROR_CATEGORIES: + error(filename, linenum, 'readability/nolint', 5, + 'Unknown NOLINT error category: %s' % category) + + +def ProcessGlobalSuppresions(lines): + """Updates the list of global error suppressions. + + Parses any lint directives in the file that have global effect. + + Args: + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. + """ + for line in lines: + if _SEARCH_C_FILE.search(line): + for category in _DEFAULT_C_SUPPRESSED_CATEGORIES: + _global_error_suppressions[category] = True + if _SEARCH_KERNEL_FILE.search(line): + for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES: + _global_error_suppressions[category] = True + + +def ResetNolintSuppressions(): + """Resets the set of NOLINT suppressions to empty.""" + _error_suppressions.clear() + _global_error_suppressions.clear() + + +def IsErrorSuppressedByNolint(category, linenum): + """Returns true if the specified error category is suppressed on this line. + + Consults the global error_suppressions map populated by + ParseNolintSuppressions/ProcessGlobalSuppresions/ResetNolintSuppressions. + + Args: + category: str, the category of the error. + linenum: int, the current line number. + Returns: + bool, True iff the error should be suppressed due to a NOLINT comment or + global suppression. + """ + return (_global_error_suppressions.get(category, False) or + linenum in _error_suppressions.get(category, set()) or + linenum in _error_suppressions.get(None, set())) + + +def Match(pattern, s): + """Matches the string with the pattern, caching the compiled regexp.""" + # The regexp compilation caching is inlined in both Match and Search for + # performance reasons; factoring it out into a separate function turns out + # to be noticeably expensive. + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].match(s) + + +def ReplaceAll(pattern, rep, s): + """Replaces instances of pattern in a string with a replacement. + + The compiled regex is kept in a cache shared by Match and Search. + + Args: + pattern: regex pattern + rep: replacement text + s: search string + + Returns: + string with replacements made (or original string if no replacements) + """ + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].sub(rep, s) + + +def Search(pattern, s): + """Searches the string for the pattern, caching the compiled regexp.""" + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].search(s) + + +def _IsSourceExtension(s): + """File extension (excluding dot) matches a source file extension.""" + return s in ('c', 'cc', 'cpp', 'cxx') + + +class _IncludeState(object): + """Tracks line numbers for includes, and the order in which includes appear. + + include_list contains list of lists of (header, line number) pairs. + It's a lists of lists rather than just one flat list to make it + easier to update across preprocessor boundaries. + + Call CheckNextIncludeOrder() once for each header in the file, passing + in the type constants defined above. Calls in an illegal order will + raise an _IncludeError with an appropriate error message. + + """ + # self._section will move monotonically through this set. If it ever + # needs to move backwards, CheckNextIncludeOrder will raise an error. + _INITIAL_SECTION = 0 + _MY_H_SECTION = 1 + _C_SECTION = 2 + _CPP_SECTION = 3 + _OTHER_H_SECTION = 4 + + _TYPE_NAMES = { + _C_SYS_HEADER: 'C system header', + _CPP_SYS_HEADER: 'C++ system header', + _LIKELY_MY_HEADER: 'header this file implements', + _POSSIBLE_MY_HEADER: 'header this file may implement', + _OTHER_HEADER: 'other header', + } + _SECTION_NAMES = { + _INITIAL_SECTION: "... nothing. (This can't be an error.)", + _MY_H_SECTION: 'a header this file implements', + _C_SECTION: 'C system header', + _CPP_SECTION: 'C++ system header', + _OTHER_H_SECTION: 'other header', + } + + def __init__(self): + self.include_list = [[]] + self.ResetSection('') + + def FindHeader(self, header): + """Check if a header has already been included. + + Args: + header: header to check. + Returns: + Line number of previous occurrence, or -1 if the header has not + been seen before. + """ + for section_list in self.include_list: + for f in section_list: + if f[0] == header: + return f[1] + return -1 + + def ResetSection(self, directive): + """Reset section checking for preprocessor directive. + + Args: + directive: preprocessor directive (e.g. "if", "else"). + """ + # The name of the current section. + self._section = self._INITIAL_SECTION + # The path of last found header. + self._last_header = '' + + # Update list of includes. Note that we never pop from the + # include list. + if directive in ('if', 'ifdef', 'ifndef'): + self.include_list.append([]) + elif directive in ('else', 'elif'): + self.include_list[-1] = [] + + def SetLastHeader(self, header_path): + self._last_header = header_path + + def CanonicalizeAlphabeticalOrder(self, header_path): + """Returns a path canonicalized for alphabetical comparison. + + - replaces "-" with "_" so they both cmp the same. + - removes '-inl' since we don't require them to be after the main header. + - lowercase everything, just in case. + + Args: + header_path: Path to be canonicalized. + + Returns: + Canonicalized path. + """ + return header_path.replace('-inl.h', '.h').replace('-', '_').lower() + + def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): + """Check if a header is in alphabetical order with the previous header. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + header_path: Canonicalized header to be checked. + + Returns: + Returns true if the header is in alphabetical order. + """ + # If previous section is different from current section, _last_header will + # be reset to empty string, so it's always less than current header. + # + # If previous line was a blank line, assume that the headers are + # intentionally sorted the way they are. + if (self._last_header > header_path and + Match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])): + return False + return True + + def CheckNextIncludeOrder(self, header_type): + """Returns a non-empty error message if the next header is out of order. + + This function also updates the internal state to be ready to check + the next include. + + Args: + header_type: One of the _XXX_HEADER constants defined above. + + Returns: + The empty string if the header is in the right order, or an + error message describing what's wrong. + + """ + error_message = ('Found %s after %s' % + (self._TYPE_NAMES[header_type], + self._SECTION_NAMES[self._section])) + + last_section = self._section + + if header_type == _C_SYS_HEADER: + if self._section <= self._C_SECTION: + self._section = self._C_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _CPP_SYS_HEADER: + if self._section <= self._CPP_SECTION: + self._section = self._CPP_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _LIKELY_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + self._section = self._OTHER_H_SECTION + elif header_type == _POSSIBLE_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + # This will always be the fallback because we're not sure + # enough that the header is associated with this file. + self._section = self._OTHER_H_SECTION + else: + assert header_type == _OTHER_HEADER + self._section = self._OTHER_H_SECTION + + if last_section != self._section: + self._last_header = '' + + return '' + + +class _CppLintState(object): + """Maintains module-wide state..""" + + def __init__(self): + self.verbose_level = 1 # global setting. + self.error_count = 0 # global count of reported errors + # filters to apply when emitting error messages + self.filters = _DEFAULT_FILTERS[:] + # backup of filter list. Used to restore the state after each file. + self._filters_backup = self.filters[:] + self.counting = 'total' # In what way are we counting errors? + self.errors_by_category = {} # string to int dict storing error counts + self.quiet = False # Suppress non-error messagess? + + # output format: + # "emacs" - format that emacs can parse (default) + # "vs7" - format that Microsoft Visual Studio 7 can parse + self.output_format = 'emacs' + + def SetOutputFormat(self, output_format): + """Sets the output format for errors.""" + self.output_format = output_format + + def SetQuiet(self, quiet): + """Sets the module's quiet settings, and returns the previous setting.""" + last_quiet = self.quiet + self.quiet = quiet + return last_quiet + + def SetVerboseLevel(self, level): + """Sets the module's verbosity, and returns the previous setting.""" + last_verbose_level = self.verbose_level + self.verbose_level = level + return last_verbose_level + + def SetCountingStyle(self, counting_style): + """Sets the module's counting options.""" + self.counting = counting_style + + def SetFilters(self, filters): + """Sets the error-message filters. + + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "+whitespace/indent"). + Each filter should start with + or -; else we die. + + Raises: + ValueError: The comma-separated filters did not all start with '+' or '-'. + E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" + """ + # Default filters always have less priority than the flag ones. + self.filters = _DEFAULT_FILTERS[:] + self.AddFilters(filters) + + def AddFilters(self, filters): + """ Adds more filters to the existing list of error-message filters. """ + for filt in filters.split(','): + clean_filt = filt.strip() + if clean_filt: + self.filters.append(clean_filt) + for filt in self.filters: + if not (filt.startswith('+') or filt.startswith('-')): + raise ValueError('Every filter in --filters must start with + or -' + ' (%s does not)' % filt) + + def BackupFilters(self): + """ Saves the current filter list to backup storage.""" + self._filters_backup = self.filters[:] + + def RestoreFilters(self): + """ Restores filters previously backed up.""" + self.filters = self._filters_backup[:] + + def ResetErrorCounts(self): + """Sets the module's error statistic back to zero.""" + self.error_count = 0 + self.errors_by_category = {} + + def IncrementErrorCount(self, category): + """Bumps the module's error statistic.""" + self.error_count += 1 + if self.counting in ('toplevel', 'detailed'): + if self.counting != 'detailed': + category = category.split('/')[0] + if category not in self.errors_by_category: + self.errors_by_category[category] = 0 + self.errors_by_category[category] += 1 + + def PrintErrorCounts(self): + """Print a summary of errors by category, and the total.""" + for category, count in self.errors_by_category.iteritems(): + sys.stderr.write('Category \'%s\' errors found: %d\n' % + (category, count)) + sys.stdout.write('Total errors found: %d\n' % self.error_count) + +_cpplint_state = _CppLintState() + + +def _OutputFormat(): + """Gets the module's output format.""" + return _cpplint_state.output_format + + +def _SetOutputFormat(output_format): + """Sets the module's output format.""" + _cpplint_state.SetOutputFormat(output_format) + +def _Quiet(): + """Return's the module's quiet setting.""" + return _cpplint_state.quiet + +def _SetQuiet(quiet): + """Set the module's quiet status, and return previous setting.""" + return _cpplint_state.SetQuiet(quiet) + + +def _VerboseLevel(): + """Returns the module's verbosity setting.""" + return _cpplint_state.verbose_level + + +def _SetVerboseLevel(level): + """Sets the module's verbosity, and returns the previous setting.""" + return _cpplint_state.SetVerboseLevel(level) + + +def _SetCountingStyle(level): + """Sets the module's counting options.""" + _cpplint_state.SetCountingStyle(level) + + +def _Filters(): + """Returns the module's list of output filters, as a list.""" + return _cpplint_state.filters + + +def _SetFilters(filters): + """Sets the module's error-message filters. + + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.SetFilters(filters) + +def _AddFilters(filters): + """Adds more filter overrides. + + Unlike _SetFilters, this function does not reset the current list of filters + available. + + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.AddFilters(filters) + +def _BackupFilters(): + """ Saves the current filter list to backup storage.""" + _cpplint_state.BackupFilters() + +def _RestoreFilters(): + """ Restores filters previously backed up.""" + _cpplint_state.RestoreFilters() + +class _FunctionState(object): + """Tracks current function name and the number of lines in its body.""" + + _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. + _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. + + def __init__(self): + self.in_a_function = False + self.lines_in_function = 0 + self.current_function = '' + + def Begin(self, function_name): + """Start analyzing function body. + + Args: + function_name: The name of the function being tracked. + """ + self.in_a_function = True + self.lines_in_function = 0 + self.current_function = function_name + + def Count(self): + """Count line in current function body.""" + if self.in_a_function: + self.lines_in_function += 1 + + def Check(self, error, filename, linenum): + """Report if too many lines in function body. + + Args: + error: The function to call with any errors found. + filename: The name of the current file. + linenum: The number of the line to check. + """ + if not self.in_a_function: + return + + if Match(r'T(EST|est)', self.current_function): + base_trigger = self._TEST_TRIGGER + else: + base_trigger = self._NORMAL_TRIGGER + trigger = base_trigger * 2**_VerboseLevel() + + if self.lines_in_function > trigger: + error_level = int(math.log(self.lines_in_function / base_trigger, 2)) + # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... + if error_level > 5: + error_level = 5 + error(filename, linenum, 'readability/fn_size', error_level, + 'Small and focused functions are preferred:' + ' %s has %d non-comment lines' + ' (error triggered by exceeding %d lines).' % ( + self.current_function, self.lines_in_function, trigger)) + + def End(self): + """Stop analyzing function body.""" + self.in_a_function = False + + +class _IncludeError(Exception): + """Indicates a problem with the include order in a file.""" + pass + + +class FileInfo(object): + """Provides utility functions for filenames. + + FileInfo provides easy access to the components of a file's path + relative to the project root. + """ + + def __init__(self, filename): + self._filename = filename + + def FullName(self): + """Make Windows paths like Unix.""" + return os.path.abspath(self._filename).replace('\\', '/') + + def RepositoryName(self): + """FullName after removing the local path to the repository. + + If we have a real absolute path name here we can try to do something smart: + detecting the root of the checkout and truncating /path/to/checkout from + the name so that we get header guards that don't include things like + "C:\Documents and Settings\..." or "/home/username/..." in them and thus + people on different computers who have checked the source out to different + locations won't see bogus errors. + """ + fullname = self.FullName() + + if os.path.exists(fullname): + project_dir = os.path.dirname(fullname) + + if os.path.exists(os.path.join(project_dir, ".svn")): + # If there's a .svn file in the current directory, we recursively look + # up the directory tree for the top of the SVN checkout + root_dir = project_dir + one_up_dir = os.path.dirname(root_dir) + while os.path.exists(os.path.join(one_up_dir, ".svn")): + root_dir = os.path.dirname(root_dir) + one_up_dir = os.path.dirname(one_up_dir) + + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by + # searching up from the current path. + root_dir = current_dir = os.path.dirname(fullname) + while current_dir != os.path.dirname(current_dir): + if (os.path.exists(os.path.join(current_dir, ".git")) or + os.path.exists(os.path.join(current_dir, ".hg")) or + os.path.exists(os.path.join(current_dir, ".svn"))): + root_dir = current_dir + current_dir = os.path.dirname(current_dir) + + if (os.path.exists(os.path.join(root_dir, ".git")) or + os.path.exists(os.path.join(root_dir, ".hg")) or + os.path.exists(os.path.join(root_dir, ".svn"))): + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Don't know what to do; header guard warnings may be wrong... + return fullname + + def Split(self): + """Splits the file into the directory, basename, and extension. + + For 'chrome/browser/browser.cc', Split() would + return ('chrome/browser', 'browser', '.cc') + + Returns: + A tuple of (directory, basename, extension). + """ + + googlename = self.RepositoryName() + project, rest = os.path.split(googlename) + return (project,) + os.path.splitext(rest) + + def BaseName(self): + """File base name - text after the final slash, before the final period.""" + return self.Split()[1] + + def Extension(self): + """File extension - text following the final period.""" + return self.Split()[2] + + def NoExtension(self): + """File has no source file extension.""" + return '/'.join(self.Split()[0:2]) + + def IsSource(self): + """File has a source file extension.""" + return _IsSourceExtension(self.Extension()[1:]) + + +def _ShouldPrintError(category, confidence, linenum): + """If confidence >= verbose, category passes filter and is not suppressed.""" + + # There are three ways we might decide not to print an error message: + # a "NOLINT(category)" comment appears in the source, + # the verbosity level isn't high enough, or the filters filter it out. + if IsErrorSuppressedByNolint(category, linenum): + return False + + if confidence < _cpplint_state.verbose_level: + return False + + is_filtered = False + for one_filter in _Filters(): + if one_filter.startswith('-'): + if category.startswith(one_filter[1:]): + is_filtered = True + elif one_filter.startswith('+'): + if category.startswith(one_filter[1:]): + is_filtered = False + else: + assert False # should have been checked for in SetFilter. + if is_filtered: + return False + + return True + + +def Error(filename, linenum, category, confidence, message): + """Logs the fact we've found a lint error. + + We log where the error was found, and also our confidence in the error, + that is, how certain we are this is a legitimate style regression, and + not a misidentification or a use that's sometimes justified. + + False positives can be suppressed by the use of + "cpplint(category)" comments on the offending line. These are + parsed into _error_suppressions. + + Args: + filename: The name of the file containing the error. + linenum: The number of the line containing the error. + category: A string used to describe the "category" this bug + falls under: "whitespace", say, or "runtime". Categories + may have a hierarchy separated by slashes: "whitespace/indent". + confidence: A number from 1-5 representing a confidence score for + the error, with 5 meaning that we are certain of the problem, + and 1 meaning that it could be a legitimate construct. + message: The error message. + """ + if _ShouldPrintError(category, confidence, linenum): + _cpplint_state.IncrementErrorCount(category) + if _cpplint_state.output_format == 'vs7': + sys.stderr.write('%s(%s): error cpplint: [%s] %s [%d]\n' % ( + filename, linenum, category, message, confidence)) + elif _cpplint_state.output_format == 'eclipse': + sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % ( + filename, linenum, message, category, confidence)) + else: + sys.stderr.write('%s:%s: %s [%s] [%d]\n' % ( + filename, linenum, message, category, confidence)) + + +# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard. +_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( + r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') +# Match a single C style comment on the same line. +_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/' +# Matches multi-line C style comments. +# This RE is a little bit more complicated than one might expect, because we +# have to take care of space removals tools so we can handle comments inside +# statements better. +# The current rule is: We only clear spaces from both sides when we're at the +# end of the line. Otherwise, we try to remove spaces from the right side, +# if this doesn't work we try on left side but only if there's a non-character +# on the right. +_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( + r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' + + _RE_PATTERN_C_COMMENTS + r'\s+|' + + r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' + + _RE_PATTERN_C_COMMENTS + r')') + + +def IsCppString(line): + """Does line terminate so, that the next symbol is in string constant. + + This function does not consider single-line nor multi-line comments. + + Args: + line: is a partial line of code starting from the 0..n. + + Returns: + True, if next character appended to 'line' is inside a + string constant. + """ + + line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" + return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 + + +def CleanseRawStrings(raw_lines): + """Removes C++11 raw strings from lines. + + Before: + static const char kData[] = R"( + multi-line string + )"; + + After: + static const char kData[] = "" + (replaced by blank line) + ""; + + Args: + raw_lines: list of raw lines. + + Returns: + list of lines with C++11 raw strings replaced by empty strings. + """ + + delimiter = None + lines_without_raw_strings = [] + for line in raw_lines: + if delimiter: + # Inside a raw string, look for the end + end = line.find(delimiter) + if end >= 0: + # Found the end of the string, match leading space for this + # line and resume copying the original lines, and also insert + # a "" on the last line. + leading_space = Match(r'^(\s*)\S', line) + line = leading_space.group(1) + '""' + line[end + len(delimiter):] + delimiter = None + else: + # Haven't found the end yet, append a blank line. + line = '""' + + # Look for beginning of a raw string, and replace them with + # empty strings. This is done in a loop to handle multiple raw + # strings on the same line. + while delimiter is None: + # Look for beginning of a raw string. + # See 2.14.15 [lex.string] for syntax. + # + # Once we have matched a raw string, we check the prefix of the + # line to make sure that the line is not part of a single line + # comment. It's done this way because we remove raw strings + # before removing comments as opposed to removing comments + # before removing raw strings. This is because there are some + # cpplint checks that requires the comments to be preserved, but + # we don't want to check comments that are inside raw strings. + matched = Match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) + if (matched and + not Match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//', + matched.group(1))): + delimiter = ')' + matched.group(2) + '"' + + end = matched.group(3).find(delimiter) + if end >= 0: + # Raw string ended on same line + line = (matched.group(1) + '""' + + matched.group(3)[end + len(delimiter):]) + delimiter = None + else: + # Start of a multi-line raw string + line = matched.group(1) + '""' + else: + break + + lines_without_raw_strings.append(line) + + # TODO(unknown): if delimiter is not None here, we might want to + # emit a warning for unterminated string. + return lines_without_raw_strings + + +def FindNextMultiLineCommentStart(lines, lineix): + """Find the beginning marker for a multiline comment.""" + while lineix < len(lines): + if lines[lineix].strip().startswith('/*'): + # Only return this marker if the comment goes beyond this line + if lines[lineix].strip().find('*/', 2) < 0: + return lineix + lineix += 1 + return len(lines) + + +def FindNextMultiLineCommentEnd(lines, lineix): + """We are inside a comment, find the end marker.""" + while lineix < len(lines): + if lines[lineix].strip().endswith('*/'): + return lineix + lineix += 1 + return len(lines) + + +def RemoveMultiLineCommentsFromRange(lines, begin, end): + """Clears a range of lines for multi-line comments.""" + # Having // dummy comments makes the lines non-empty, so we will not get + # unnecessary blank line warnings later in the code. + for i in range(begin, end): + lines[i] = '/**/' + + +def RemoveMultiLineComments(filename, lines, error): + """Removes multiline (c-style) comments from lines.""" + lineix = 0 + while lineix < len(lines): + lineix_begin = FindNextMultiLineCommentStart(lines, lineix) + if lineix_begin >= len(lines): + return + lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin) + if lineix_end >= len(lines): + error(filename, lineix_begin + 1, 'readability/multiline_comment', 5, + 'Could not find end of multi-line comment') + return + RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1) + lineix = lineix_end + 1 + + +def CleanseComments(line): + """Removes //-comments and single-line C-style /* */ comments. + + Args: + line: A line of C++ source. + + Returns: + The line with single-line comments removed. + """ + commentpos = line.find('//') + if commentpos != -1 and not IsCppString(line[:commentpos]): + line = line[:commentpos].rstrip() + # get rid of /* ... */ + return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) + + +class CleansedLines(object): + """Holds 4 copies of all lines with different preprocessing applied to them. + + 1) elided member contains lines without strings and comments. + 2) lines member contains lines without comments. + 3) raw_lines member contains all the lines without processing. + 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw + strings removed. + All these members are of , and of the same length. + """ + + def __init__(self, lines): + self.elided = [] + self.lines = [] + self.raw_lines = lines + self.num_lines = len(lines) + self.lines_without_raw_strings = CleanseRawStrings(lines) + for linenum in range(len(self.lines_without_raw_strings)): + self.lines.append(CleanseComments( + self.lines_without_raw_strings[linenum])) + elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) + self.elided.append(CleanseComments(elided)) + + def NumLines(self): + """Returns the number of lines represented.""" + return self.num_lines + + @staticmethod + def _CollapseStrings(elided): + """Collapses strings and chars on a line to simple "" or '' blocks. + + We nix strings first so we're not fooled by text like '"http://"' + + Args: + elided: The line being processed. + + Returns: + The line with collapsed strings. + """ + if _RE_PATTERN_INCLUDE.match(elided): + return elided + + # Remove escaped characters first to make quote/single quote collapsing + # basic. Things that look like escaped characters shouldn't occur + # outside of strings and chars. + elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) + + # Replace quoted strings and digit separators. Both single quotes + # and double quotes are processed in the same loop, otherwise + # nested quotes wouldn't work. + collapsed = '' + while True: + # Find the first quote character + match = Match(r'^([^\'"]*)([\'"])(.*)$', elided) + if not match: + collapsed += elided + break + head, quote, tail = match.groups() + + if quote == '"': + # Collapse double quoted strings + second_quote = tail.find('"') + if second_quote >= 0: + collapsed += head + '""' + elided = tail[second_quote + 1:] + else: + # Unmatched double quote, don't bother processing the rest + # of the line since this is probably a multiline string. + collapsed += elided + break + else: + # Found single quote, check nearby text to eliminate digit separators. + # + # There is no special handling for floating point here, because + # the integer/fractional/exponent parts would all be parsed + # correctly as long as there are digits on both sides of the + # separator. So we are fine as long as we don't see something + # like "0.'3" (gcc 4.9.0 will not allow this literal). + if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): + match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) + collapsed += head + match_literal.group(1).replace("'", '') + elided = match_literal.group(2) + else: + second_quote = tail.find('\'') + if second_quote >= 0: + collapsed += head + "''" + elided = tail[second_quote + 1:] + else: + # Unmatched single quote + collapsed += elided + break + + return collapsed + + +def FindEndOfExpressionInLine(line, startpos, stack): + """Find the position just after the end of current parenthesized expression. + + Args: + line: a CleansedLines line. + startpos: start searching at this position. + stack: nesting stack at startpos. + + Returns: + On finding matching end: (index just after matching end, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at end of this line) + """ + for i in xrange(startpos, len(line)): + char = line[i] + if char in '([{': + # Found start of parenthesized expression, push to expression stack + stack.append(char) + elif char == '<': + # Found potential start of template argument list + if i > 0 and line[i - 1] == '<': + # Left shift operator + if stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + elif i > 0 and Search(r'\boperator\s*$', line[0:i]): + # operator<, don't add to stack + continue + else: + # Tentative start of template argument list + stack.append('<') + elif char in ')]}': + # Found end of parenthesized expression. + # + # If we are currently expecting a matching '>', the pending '<' + # must have been an operator. Remove them from expression stack. + while stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + if ((stack[-1] == '(' and char == ')') or + (stack[-1] == '[' and char == ']') or + (stack[-1] == '{' and char == '}')): + stack.pop() + if not stack: + return (i + 1, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == '>': + # Found potential end of template argument list. + + # Ignore "->" and operator functions + if (i > 0 and + (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))): + continue + + # Pop the stack if there is a matching '<'. Otherwise, ignore + # this '>' since it must be an operator. + if stack: + if stack[-1] == '<': + stack.pop() + if not stack: + return (i + 1, None) + elif char == ';': + # Found something that look like end of statements. If we are currently + # expecting a '>', the matching '<' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + + # Did not find end of expression or unbalanced parentheses on this line + return (-1, stack) + + +def CloseExpression(clean_lines, linenum, pos): + """If input points to ( or { or [ or <, finds the position that closes it. + + If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the + linenum/pos that correspond to the closing of the expression. + + TODO(unknown): cpplint spends a fair bit of time matching parentheses. + Ideally we would want to index all opening and closing parentheses once + and have CloseExpression be just a simple lookup, but due to preprocessor + tricks, this is not so easy. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. + + Returns: + A tuple (line, linenum, pos) pointer *past* the closing brace, or + (line, len(lines), -1) if we never find a close. Note we ignore + strings and comments when matching; and the line we return is the + 'cleansed' line at linenum. + """ + + line = clean_lines.elided[linenum] + if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]): + return (line, clean_lines.NumLines(), -1) + + # Check first line + (end_pos, stack) = FindEndOfExpressionInLine(line, pos, []) + if end_pos > -1: + return (line, linenum, end_pos) + + # Continue scanning forward + while stack and linenum < clean_lines.NumLines() - 1: + linenum += 1 + line = clean_lines.elided[linenum] + (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack) + if end_pos > -1: + return (line, linenum, end_pos) + + # Did not find end of expression before end of file, give up + return (line, clean_lines.NumLines(), -1) + + +def FindStartOfExpressionInLine(line, endpos, stack): + """Find position at the matching start of current expression. + + This is almost the reverse of FindEndOfExpressionInLine, but note + that the input position and returned position differs by 1. + + Args: + line: a CleansedLines line. + endpos: start searching at this position. + stack: nesting stack at endpos. + + Returns: + On finding matching start: (index at matching start, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at beginning of this line) + """ + i = endpos + while i >= 0: + char = line[i] + if char in ')]}': + # Found end of expression, push to expression stack + stack.append(char) + elif char == '>': + # Found potential end of template argument list. + # + # Ignore it if it's a "->" or ">=" or "operator>" + if (i > 0 and + (line[i - 1] == '-' or + Match(r'\s>=\s', line[i - 1:]) or + Search(r'\boperator\s*$', line[0:i]))): + i -= 1 + else: + stack.append('>') + elif char == '<': + # Found potential start of template argument list + if i > 0 and line[i - 1] == '<': + # Left shift operator + i -= 1 + else: + # If there is a matching '>', we can pop the expression stack. + # Otherwise, ignore this '<' since it must be an operator. + if stack and stack[-1] == '>': + stack.pop() + if not stack: + return (i, None) + elif char in '([{': + # Found start of expression. + # + # If there are any unmatched '>' on the stack, they must be + # operators. Remove those. + while stack and stack[-1] == '>': + stack.pop() + if not stack: + return (-1, None) + if ((char == '(' and stack[-1] == ')') or + (char == '[' and stack[-1] == ']') or + (char == '{' and stack[-1] == '}')): + stack.pop() + if not stack: + return (i, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == ';': + # Found something that look like end of statements. If we are currently + # expecting a '<', the matching '>' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == '>': + stack.pop() + if not stack: + return (-1, None) + + i -= 1 + + return (-1, stack) + + +def ReverseCloseExpression(clean_lines, linenum, pos): + """If input points to ) or } or ] or >, finds the position that opens it. + + If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the + linenum/pos that correspond to the opening of the expression. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. + + Returns: + A tuple (line, linenum, pos) pointer *at* the opening brace, or + (line, 0, -1) if we never find the matching opening brace. Note + we ignore strings and comments when matching; and the line we + return is the 'cleansed' line at linenum. + """ + line = clean_lines.elided[linenum] + if line[pos] not in ')}]>': + return (line, 0, -1) + + # Check last line + (start_pos, stack) = FindStartOfExpressionInLine(line, pos, []) + if start_pos > -1: + return (line, linenum, start_pos) + + # Continue scanning backward + while stack and linenum > 0: + linenum -= 1 + line = clean_lines.elided[linenum] + (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack) + if start_pos > -1: + return (line, linenum, start_pos) + + # Did not find start of expression before beginning of file, give up + return (line, 0, -1) + + +def CheckForCopyright(filename, lines, error): + """Logs an error if no Copyright message appears at the top of the file.""" + + # We'll say it should occur by line 10. Don't forget there's a + # dummy line at the front. + for line in xrange(1, min(len(lines), 11)): + if re.search(r'Copyright', lines[line], re.I): break + else: # means no copyright line was found + error(filename, 0, 'legal/copyright', 5, + 'No copyright message found. ' + 'You should have a line: "Copyright [year] "') + + +def GetIndentLevel(line): + """Return the number of leading spaces in line. + + Args: + line: A string to check. + + Returns: + An integer count of leading spaces, possibly zero. + """ + indent = Match(r'^( *)\S', line) + if indent: + return len(indent.group(1)) + else: + return 0 + +def PathSplitToList(path): + """Returns the path split into a list by the separator. + + Args: + path: An absolute or relative path (e.g. '/a/b/c/' or '../a') + + Returns: + A list of path components (e.g. ['a', 'b', 'c]). + """ + lst = [] + while True: + (head, tail) = os.path.split(path) + if head == path: # absolute paths end + lst.append(head) + break + if tail == path: # relative paths end + lst.append(tail) + break + + path = head + lst.append(tail) + + lst.reverse() + return lst + +def GetHeaderGuardCPPVariable(filename): + """Returns the CPP variable that should be used as a header guard. + + Args: + filename: The name of a C++ header file. + + Returns: + The CPP variable that should be used as a header guard in the + named file. + + """ + + # Restores original filename in case that cpplint is invoked from Emacs's + # flymake. + filename = re.sub(r'_flymake\.h$', '.h', filename) + filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename) + # Replace 'c++' with 'cpp'. + filename = filename.replace('C++', 'cpp').replace('c++', 'cpp') + + fileinfo = FileInfo(filename) + file_path_from_root = fileinfo.RepositoryName() + + def FixupPathFromRoot(): + if _root_debug: + sys.stderr.write("\n_root fixup, _root = '%s', repository name = '%s'\n" + %(_root, fileinfo.RepositoryName())) + + # Process the file path with the --root flag if it was set. + if not _root: + if _root_debug: + sys.stderr.write("_root unspecified\n") + return file_path_from_root + + def StripListPrefix(lst, prefix): + # f(['x', 'y'], ['w, z']) -> None (not a valid prefix) + if lst[:len(prefix)] != prefix: + return None + # f(['a, 'b', 'c', 'd'], ['a', 'b']) -> ['c', 'd'] + return lst[(len(prefix)):] + + # root behavior: + # --root=subdir , lstrips subdir from the header guard + maybe_path = StripListPrefix(PathSplitToList(file_path_from_root), + PathSplitToList(_root)) + + if _root_debug: + sys.stderr.write("_root lstrip (maybe_path=%s, file_path_from_root=%s," + + " _root=%s)\n" %(maybe_path, file_path_from_root, _root)) + + if maybe_path: + return os.path.join(*maybe_path) + + # --root=.. , will prepend the outer directory to the header guard + full_path = fileinfo.FullName() + root_abspath = os.path.abspath(_root) + + maybe_path = StripListPrefix(PathSplitToList(full_path), + PathSplitToList(root_abspath)) + + if _root_debug: + sys.stderr.write("_root prepend (maybe_path=%s, full_path=%s, " + + "root_abspath=%s)\n" %(maybe_path, full_path, root_abspath)) + + if maybe_path: + return os.path.join(*maybe_path) + + if _root_debug: + sys.stderr.write("_root ignore, returning %s\n" %(file_path_from_root)) + + # --root=FAKE_DIR is ignored + return file_path_from_root + + file_path_from_root = FixupPathFromRoot() + return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_' + + +def CheckForHeaderGuard(filename, clean_lines, error): + """Checks that the file contains a header guard. + + Logs an error if no #ifndef header guard is present. For other + headers, checks that the full pathname is used. + + Args: + filename: The name of the C++ header file. + clean_lines: A CleansedLines instance containing the file. + error: The function to call with any errors found. + """ + + # Don't check for header guards if there are error suppression + # comments somewhere in this file. + # + # Because this is silencing a warning for a nonexistent line, we + # only support the very specific NOLINT(build/header_guard) syntax, + # and not the general NOLINT or NOLINT(*) syntax. + raw_lines = clean_lines.lines_without_raw_strings + for i in raw_lines: + if Search(r'//\s*NOLINT\(build/header_guard\)', i): + return + + cppvar = GetHeaderGuardCPPVariable(filename) + + ifndef = '' + ifndef_linenum = 0 + define = '' + endif = '' + endif_linenum = 0 + for linenum, line in enumerate(raw_lines): + linesplit = line.split() + if len(linesplit) >= 2: + # find the first occurrence of #ifndef and #define, save arg + if not ifndef and linesplit[0] == '#ifndef': + # set ifndef to the header guard presented on the #ifndef line. + ifndef = linesplit[1] + ifndef_linenum = linenum + if not define and linesplit[0] == '#define': + define = linesplit[1] + # find the last occurrence of #endif, save entire line + if line.startswith('#endif'): + endif = line + endif_linenum = linenum + + if not ifndef or not define or ifndef != define: + error(filename, 0, 'build/header_guard', 5, + 'No #ifndef header guard found, suggested CPP variable is: %s' % + cppvar) + return + + # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ + # for backward compatibility. + if ifndef != cppvar: + error_level = 0 + if ifndef != cppvar + '_': + error_level = 5 + + ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum, + error) + error(filename, ifndef_linenum, 'build/header_guard', error_level, + '#ifndef header guard has wrong style, please use: %s' % cppvar) + + # Check for "//" comments on endif line. + ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum, + error) + match = Match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif) + if match: + if match.group(1) == '_': + # Issue low severity warning for deprecated double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + '#endif line should be "#endif // %s"' % cppvar) + return + + # Didn't find the corresponding "//" comment. If this file does not + # contain any "//" comments at all, it could be that the compiler + # only wants "/**/" comments, look for those instead. + no_single_line_comments = True + for i in xrange(1, len(raw_lines) - 1): + line = raw_lines[i] + if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line): + no_single_line_comments = False + break + + if no_single_line_comments: + match = Match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif) + if match: + if match.group(1) == '_': + # Low severity warning for double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + '#endif line should be "#endif /* %s */"' % cppvar) + return + + # Didn't find anything + error(filename, endif_linenum, 'build/header_guard', 5, + '#endif line should be "#endif // %s"' % cppvar) + + +def CheckHeaderFileIncluded(filename, include_state, error): + """Logs an error if a .cc file does not include its header.""" + + # Do not check test files + fileinfo = FileInfo(filename) + if Search(_TEST_FILE_SUFFIX, fileinfo.BaseName()): + return + + headerfile = filename[0:len(filename) - len(fileinfo.Extension())] + '.h' + if not os.path.exists(headerfile): + return + headername = FileInfo(headerfile).RepositoryName() + first_include = 0 + for section_list in include_state.include_list: + for f in section_list: + if headername in f[0] or f[0] in headername: + return + if not first_include: + first_include = f[1] + + error(filename, first_include, 'build/include', 5, + '%s should include its header file %s' % (fileinfo.RepositoryName(), + headername)) + + +def CheckForBadCharacters(filename, lines, error): + """Logs an error for each line containing bad characters. + + Two kinds of bad characters: + + 1. Unicode replacement characters: These indicate that either the file + contained invalid UTF-8 (likely) or Unicode replacement characters (which + it shouldn't). Note that it's possible for this to throw off line + numbering if the invalid UTF-8 occurred adjacent to a newline. + + 2. NUL bytes. These are problematic for some tools. + + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ + for linenum, line in enumerate(lines): + if u'\ufffd' in line: + error(filename, linenum, 'readability/utf8', 5, + 'Line contains invalid UTF-8 (or Unicode replacement character).') + if '\0' in line: + error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.') + + +def CheckForNewlineAtEOF(filename, lines, error): + """Logs an error if there is no newline char at the end of the file. + + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ + + # The array lines() was created by adding two newlines to the + # original file (go figure), then splitting on \n. + # To verify that the file ends in \n, we just have to make sure the + # last-but-two element of lines() exists and is empty. + if len(lines) < 3 or lines[-2]: + error(filename, len(lines) - 2, 'whitespace/ending_newline', 5, + 'Could not find a newline character at the end of the file.') + + +def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): + """Logs an error if we see /* ... */ or "..." that extend past one line. + + /* ... */ comments are legit inside macros, for one line. + Otherwise, we prefer // comments, so it's ok to warn about the + other. Likewise, it's ok for strings to extend across multiple + lines, as long as a line continuation character (backslash) + terminates each line. Although not currently prohibited by the C++ + style guide, it's ugly and unnecessary. We don't do well with either + in this lint program, so we warn about both. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Remove all \\ (escaped backslashes) from the line. They are OK, and the + # second (escaped) slash may trigger later \" detection erroneously. + line = line.replace('\\\\', '') + + if line.count('/*') > line.count('*/'): + error(filename, linenum, 'readability/multiline_comment', 5, + 'Complex multi-line /*...*/-style comment found. ' + 'Lint may give bogus warnings. ' + 'Consider replacing these with //-style comments, ' + 'with #if 0...#endif, ' + 'or with more clearly structured multi-line comments.') + + if (line.count('"') - line.count('\\"')) % 2: + error(filename, linenum, 'readability/multiline_string', 5, + 'Multi-line string ("...") found. This lint script doesn\'t ' + 'do well with such strings, and may give bogus warnings. ' + 'Use C++11 raw strings or concatenation instead.') + + +# (non-threadsafe name, thread-safe alternative, validation pattern) +# +# The validation pattern is used to eliminate false positives such as: +# _rand(); // false positive due to substring match. +# ->rand(); // some member function rand(). +# ACMRandom rand(seed); // some variable named rand. +# ISAACRandom rand(); // another variable named rand. +# +# Basically we require the return value of these functions to be used +# in some expression context on the same line by matching on some +# operator before the function name. This eliminates constructors and +# member function calls. +_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)' +_THREADING_LIST = ( + ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'), + ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'), + ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'), + ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'), + ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'), + ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'), + ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'), + ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'), + ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'), + ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'), + ('strtok(', 'strtok_r(', + _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'), + ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'), + ) + + +def CheckPosixThreading(filename, clean_lines, linenum, error): + """Checks for calls to thread-unsafe functions. + + Much code has been originally written without consideration of + multi-threading. Also, engineers are relying on their old experience; + they have learned posix before threading extensions were added. These + tests guide the engineers to use thread-safe functions (when using + posix directly). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST: + # Additional pattern matching check to confirm that this is the + # function we are looking for + if Search(pattern, line): + error(filename, linenum, 'runtime/threadsafe_fn', 2, + 'Consider using ' + multithread_safe_func + + '...) instead of ' + single_thread_func + + '...) for improved thread safety.') + + +def CheckVlogArguments(filename, clean_lines, linenum, error): + """Checks that VLOG() is only used for defining a logging level. + + For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and + VLOG(FATAL) are not. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line): + error(filename, linenum, 'runtime/vlog', 5, + 'VLOG() should be used with numeric verbosity level. ' + 'Use LOG() if you want symbolic severity levels.') + +# Matches invalid increment: *count++, which moves pointer instead of +# incrementing a value. +_RE_PATTERN_INVALID_INCREMENT = re.compile( + r'^\s*\*\w+(\+\+|--);') + + +def CheckInvalidIncrement(filename, clean_lines, linenum, error): + """Checks for invalid increment *count++. + + For example following function: + void increment_counter(int* count) { + *count++; + } + is invalid, because it effectively does count++, moving pointer, and should + be replaced with ++*count, (*count)++ or *count += 1. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if _RE_PATTERN_INVALID_INCREMENT.match(line): + error(filename, linenum, 'runtime/invalid_increment', 5, + 'Changing pointer instead of value (or unused value of operator*).') + + +def IsMacroDefinition(clean_lines, linenum): + if Search(r'^#define', clean_lines[linenum]): + return True + + if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]): + return True + + return False + + +def IsForwardClassDeclaration(clean_lines, linenum): + return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum]) + + +class _BlockInfo(object): + """Stores information about a generic block of code.""" + + def __init__(self, linenum, seen_open_brace): + self.starting_linenum = linenum + self.seen_open_brace = seen_open_brace + self.open_parentheses = 0 + self.inline_asm = _NO_ASM + self.check_namespace_indentation = False + + def CheckBegin(self, filename, clean_lines, linenum, error): + """Run checks that applies to text up to the opening brace. + + This is mostly for checking the text after the class identifier + and the "{", usually where the base class is specified. For other + blocks, there isn't much to check, so we always pass. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass + + def CheckEnd(self, filename, clean_lines, linenum, error): + """Run checks that applies to text after the closing brace. + + This is mostly used for checking end of namespace comments. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass + + def IsBlockInfo(self): + """Returns true if this block is a _BlockInfo. + + This is convenient for verifying that an object is an instance of + a _BlockInfo, but not an instance of any of the derived classes. + + Returns: + True for this class, False for derived classes. + """ + return self.__class__ == _BlockInfo + + +class _ExternCInfo(_BlockInfo): + """Stores information about an 'extern "C"' block.""" + + def __init__(self, linenum): + _BlockInfo.__init__(self, linenum, True) + + +class _ClassInfo(_BlockInfo): + """Stores information about a class.""" + + def __init__(self, name, class_or_struct, clean_lines, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name + self.is_derived = False + self.check_namespace_indentation = True + if class_or_struct == 'struct': + self.access = 'public' + self.is_struct = True + else: + self.access = 'private' + self.is_struct = False + + # Remember initial indentation level for this class. Using raw_lines here + # instead of elided to account for leading comments. + self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum]) + + # Try to find the end of the class. This will be confused by things like: + # class A { + # } *x = { ... + # + # But it's still good enough for CheckSectionSpacing. + self.last_line = 0 + depth = 0 + for i in range(linenum, clean_lines.NumLines()): + line = clean_lines.elided[i] + depth += line.count('{') - line.count('}') + if not depth: + self.last_line = i + break + + def CheckBegin(self, filename, clean_lines, linenum, error): + # Look for a bare ':' + if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): + self.is_derived = True + + def CheckEnd(self, filename, clean_lines, linenum, error): + # If there is a DISALLOW macro, it should appear near the end of + # the class. + seen_last_thing_in_class = False + for i in xrange(linenum - 1, self.starting_linenum, -1): + match = Search( + r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' + + self.name + r'\)', + clean_lines.elided[i]) + if match: + if seen_last_thing_in_class: + error(filename, i, 'readability/constructors', 3, + match.group(1) + ' should be the last thing in the class') + break + + if not Match(r'^\s*$', clean_lines.elided[i]): + seen_last_thing_in_class = True + + # Check that closing brace is aligned with beginning of the class. + # Only do this if the closing brace is indented by only whitespaces. + # This means we will not check single-line class definitions. + indent = Match(r'^( *)\}', clean_lines.elided[linenum]) + if indent and len(indent.group(1)) != self.class_indent: + if self.is_struct: + parent = 'struct ' + self.name + else: + parent = 'class ' + self.name + error(filename, linenum, 'whitespace/indent', 3, + 'Closing brace should be aligned with beginning of %s' % parent) + + +class _NamespaceInfo(_BlockInfo): + """Stores information about a namespace.""" + + def __init__(self, name, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name or '' + self.check_namespace_indentation = True + + def CheckEnd(self, filename, clean_lines, linenum, error): + """Check end of namespace comments.""" + line = clean_lines.raw_lines[linenum] + + # Check how many lines is enclosed in this namespace. Don't issue + # warning for missing namespace comments if there aren't enough + # lines. However, do apply checks if there is already an end of + # namespace comment and it's incorrect. + # + # TODO(unknown): We always want to check end of namespace comments + # if a namespace is large, but sometimes we also want to apply the + # check if a short namespace contained nontrivial things (something + # other than forward declarations). There is currently no logic on + # deciding what these nontrivial things are, so this check is + # triggered by namespace size only, which works most of the time. + if (linenum - self.starting_linenum < 10 + and not Match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)): + return + + # Look for matching comment at end of namespace. + # + # Note that we accept C style "/* */" comments for terminating + # namespaces, so that code that terminate namespaces inside + # preprocessor macros can be cpplint clean. + # + # We also accept stuff like "// end of namespace ." with the + # period at the end. + # + # Besides these, we don't accept anything else, otherwise we might + # get false negatives when existing comment is a substring of the + # expected namespace. + if self.name: + # Named namespace + if not Match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' + + re.escape(self.name) + r'[\*/\.\\\s]*$'), + line): + error(filename, linenum, 'readability/namespace', 5, + 'Namespace should be terminated with "// namespace %s"' % + self.name) + else: + # Anonymous namespace + if not Match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): + # If "// namespace anonymous" or "// anonymous namespace (more text)", + # mention "// anonymous namespace" as an acceptable form + if Match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line): + error(filename, linenum, 'readability/namespace', 5, + 'Anonymous namespace should be terminated with "// namespace"' + ' or "// anonymous namespace"') + else: + error(filename, linenum, 'readability/namespace', 5, + 'Anonymous namespace should be terminated with "// namespace"') + + +class _PreprocessorInfo(object): + """Stores checkpoints of nesting stacks when #if/#else is seen.""" + + def __init__(self, stack_before_if): + # The entire nesting stack before #if + self.stack_before_if = stack_before_if + + # The entire nesting stack up to #else + self.stack_before_else = [] + + # Whether we have already seen #else or #elif + self.seen_else = False + + +class NestingState(object): + """Holds states related to parsing braces.""" + + def __init__(self): + # Stack for tracking all braces. An object is pushed whenever we + # see a "{", and popped when we see a "}". Only 3 types of + # objects are possible: + # - _ClassInfo: a class or struct. + # - _NamespaceInfo: a namespace. + # - _BlockInfo: some other type of block. + self.stack = [] + + # Top of the previous stack before each Update(). + # + # Because the nesting_stack is updated at the end of each line, we + # had to do some convoluted checks to find out what is the current + # scope at the beginning of the line. This check is simplified by + # saving the previous top of nesting stack. + # + # We could save the full stack, but we only need the top. Copying + # the full nesting stack would slow down cpplint by ~10%. + self.previous_stack_top = [] + + # Stack of _PreprocessorInfo objects. + self.pp_stack = [] + + def SeenOpenBrace(self): + """Check if we have seen the opening brace for the innermost block. + + Returns: + True if we have seen the opening brace, False if the innermost + block is still expecting an opening brace. + """ + return (not self.stack) or self.stack[-1].seen_open_brace + + def InNamespaceBody(self): + """Check if we are currently one level inside a namespace body. + + Returns: + True if top of the stack is a namespace block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _NamespaceInfo) + + def InExternC(self): + """Check if we are currently one level inside an 'extern "C"' block. + + Returns: + True if top of the stack is an extern block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ExternCInfo) + + def InClassDeclaration(self): + """Check if we are currently one level inside a class or struct declaration. + + Returns: + True if top of the stack is a class/struct, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ClassInfo) + + def InAsmBlock(self): + """Check if we are currently one level inside an inline ASM block. + + Returns: + True if the top of the stack is a block containing inline ASM. + """ + return self.stack and self.stack[-1].inline_asm != _NO_ASM + + def InTemplateArgumentList(self, clean_lines, linenum, pos): + """Check if current position is inside template argument list. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: position just after the suspected template argument. + Returns: + True if (linenum, pos) is inside template arguments. + """ + while linenum < clean_lines.NumLines(): + # Find the earliest character that might indicate a template argument + line = clean_lines.elided[linenum] + match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:]) + if not match: + linenum += 1 + pos = 0 + continue + token = match.group(1) + pos += len(match.group(0)) + + # These things do not look like template argument list: + # class Suspect { + # class Suspect x; } + if token in ('{', '}', ';'): return False + + # These things look like template argument list: + # template + # template + # template + # template + if token in ('>', '=', '[', ']', '.'): return True + + # Check if token is an unmatched '<'. + # If not, move on to the next character. + if token != '<': + pos += 1 + if pos >= len(line): + linenum += 1 + pos = 0 + continue + + # We can't be sure if we just find a single '<', and need to + # find the matching '>'. + (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1) + if end_pos < 0: + # Not sure if template argument list or syntax error in file + return False + linenum = end_line + pos = end_pos + return False + + def UpdatePreprocessor(self, line): + """Update preprocessor stack. + + We need to handle preprocessors due to classes like this: + #ifdef SWIG + struct ResultDetailsPageElementExtensionPoint { + #else + struct ResultDetailsPageElementExtensionPoint : public Extension { + #endif + + We make the following assumptions (good enough for most files): + - Preprocessor condition evaluates to true from #if up to first + #else/#elif/#endif. + + - Preprocessor condition evaluates to false from #else/#elif up + to #endif. We still perform lint checks on these lines, but + these do not affect nesting stack. + + Args: + line: current line to check. + """ + if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): + # Beginning of #if block, save the nesting stack here. The saved + # stack will allow us to restore the parsing state in the #else case. + self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) + elif Match(r'^\s*#\s*(else|elif)\b', line): + # Beginning of #else block + if self.pp_stack: + if not self.pp_stack[-1].seen_else: + # This is the first #else or #elif block. Remember the + # whole nesting stack up to this point. This is what we + # keep after the #endif. + self.pp_stack[-1].seen_else = True + self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack) + + # Restore the stack to how it was before the #if + self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) + else: + # TODO(unknown): unexpected #else, issue warning? + pass + elif Match(r'^\s*#\s*endif\b', line): + # End of #if or #else blocks. + if self.pp_stack: + # If we saw an #else, we will need to restore the nesting + # stack to its former state before the #else, otherwise we + # will just continue from where we left off. + if self.pp_stack[-1].seen_else: + # Here we can just use a shallow copy since we are the last + # reference to it. + self.stack = self.pp_stack[-1].stack_before_else + # Drop the corresponding #if + self.pp_stack.pop() + else: + # TODO(unknown): unexpected #endif, issue warning? + pass + + # TODO(unknown): Update() is too long, but we will refactor later. + def Update(self, filename, clean_lines, linenum, error): + """Update nesting state with current line. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Remember top of the previous nesting stack. + # + # The stack is always pushed/popped and not modified in place, so + # we can just do a shallow copy instead of copy.deepcopy. Using + # deepcopy would slow down cpplint by ~28%. + if self.stack: + self.previous_stack_top = self.stack[-1] + else: + self.previous_stack_top = None + + # Update pp_stack + self.UpdatePreprocessor(line) + + # Count parentheses. This is to avoid adding struct arguments to + # the nesting stack. + if self.stack: + inner_block = self.stack[-1] + depth_change = line.count('(') - line.count(')') + inner_block.open_parentheses += depth_change + + # Also check if we are starting or ending an inline assembly block. + if inner_block.inline_asm in (_NO_ASM, _END_ASM): + if (depth_change != 0 and + inner_block.open_parentheses == 1 and + _MATCH_ASM.match(line)): + # Enter assembly block + inner_block.inline_asm = _INSIDE_ASM + else: + # Not entering assembly block. If previous line was _END_ASM, + # we will now shift to _NO_ASM state. + inner_block.inline_asm = _NO_ASM + elif (inner_block.inline_asm == _INSIDE_ASM and + inner_block.open_parentheses == 0): + # Exit assembly block + inner_block.inline_asm = _END_ASM + + # Consume namespace declaration at the beginning of the line. Do + # this in a loop so that we catch same line declarations like this: + # namespace proto2 { namespace bridge { class MessageSet; } } + while True: + # Match start of namespace. The "\b\s*" below catches namespace + # declarations even if it weren't followed by a whitespace, this + # is so that we don't confuse our namespace checker. The + # missing spaces will be flagged by CheckSpacing. + namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line) + if not namespace_decl_match: + break + + new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum) + self.stack.append(new_namespace) + + line = namespace_decl_match.group(2) + if line.find('{') != -1: + new_namespace.seen_open_brace = True + line = line[line.find('{') + 1:] + + # Look for a class declaration in whatever is left of the line + # after parsing namespaces. The regexp accounts for decorated classes + # such as in: + # class LOCKABLE API Object { + # }; + class_decl_match = Match( + r'^(\s*(?:template\s*<[\w\s<>,:]*>\s*)?' + r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))' + r'(.*)$', line) + if (class_decl_match and + (not self.stack or self.stack[-1].open_parentheses == 0)): + # We do not want to accept classes that are actually template arguments: + # template , + # template class Ignore3> + # void Function() {}; + # + # To avoid template argument cases, we scan forward and look for + # an unmatched '>'. If we see one, assume we are inside a + # template argument list. + end_declaration = len(class_decl_match.group(1)) + if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration): + self.stack.append(_ClassInfo( + class_decl_match.group(3), class_decl_match.group(2), + clean_lines, linenum)) + line = class_decl_match.group(4) + + # If we have not yet seen the opening brace for the innermost block, + # run checks here. + if not self.SeenOpenBrace(): + self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) + + # Update access control if we are inside a class/struct + if self.stack and isinstance(self.stack[-1], _ClassInfo): + classinfo = self.stack[-1] + access_match = Match( + r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?' + r':(?:[^:]|$)', + line) + if access_match: + classinfo.access = access_match.group(2) + + # Check that access keywords are indented +1 space. Skip this + # check if the keywords are not preceded by whitespaces. + indent = access_match.group(1) + if (len(indent) != classinfo.class_indent + 1 and + Match(r'^\s*$', indent)): + if classinfo.is_struct: + parent = 'struct ' + classinfo.name + else: + parent = 'class ' + classinfo.name + slots = '' + if access_match.group(3): + slots = access_match.group(3) + error(filename, linenum, 'whitespace/indent', 3, + '%s%s: should be indented +1 space inside %s' % ( + access_match.group(2), slots, parent)) + + # Consume braces or semicolons from what's left of the line + while True: + # Match first brace, semicolon, or closed parenthesis. + matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line) + if not matched: + break + + token = matched.group(1) + if token == '{': + # If namespace or class hasn't seen a opening brace yet, mark + # namespace/class head as complete. Push a new block onto the + # stack otherwise. + if not self.SeenOpenBrace(): + self.stack[-1].seen_open_brace = True + elif Match(r'^extern\s*"[^"]*"\s*\{', line): + self.stack.append(_ExternCInfo(linenum)) + else: + self.stack.append(_BlockInfo(linenum, True)) + if _MATCH_ASM.match(line): + self.stack[-1].inline_asm = _BLOCK_ASM + + elif token == ';' or token == ')': + # If we haven't seen an opening brace yet, but we already saw + # a semicolon, this is probably a forward declaration. Pop + # the stack for these. + # + # Similarly, if we haven't seen an opening brace yet, but we + # already saw a closing parenthesis, then these are probably + # function arguments with extra "class" or "struct" keywords. + # Also pop these stack for these. + if not self.SeenOpenBrace(): + self.stack.pop() + else: # token == '}' + # Perform end of block checks and pop the stack. + if self.stack: + self.stack[-1].CheckEnd(filename, clean_lines, linenum, error) + self.stack.pop() + line = matched.group(2) + + def InnermostClass(self): + """Get class info on the top of the stack. + + Returns: + A _ClassInfo object if we are inside a class, or None otherwise. + """ + for i in range(len(self.stack), 0, -1): + classinfo = self.stack[i - 1] + if isinstance(classinfo, _ClassInfo): + return classinfo + return None + + def CheckCompletedBlocks(self, filename, error): + """Checks that all classes and namespaces have been completely parsed. + + Call this when all lines in a file have been processed. + Args: + filename: The name of the current file. + error: The function to call with any errors found. + """ + # Note: This test can result in false positives if #ifdef constructs + # get in the way of brace matching. See the testBuildClass test in + # cpplint_unittest.py for an example of this. + for obj in self.stack: + if isinstance(obj, _ClassInfo): + error(filename, obj.starting_linenum, 'build/class', 5, + 'Failed to find complete declaration of class %s' % + obj.name) + elif isinstance(obj, _NamespaceInfo): + error(filename, obj.starting_linenum, 'build/namespaces', 5, + 'Failed to find complete declaration of namespace %s' % + obj.name) + + +def CheckForNonStandardConstructs(filename, clean_lines, linenum, + nesting_state, error): + r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. + + Complain about several constructs which gcc-2 accepts, but which are + not standard C++. Warning about these in lint is one way to ease the + transition to new compilers. + - put storage class first (e.g. "static const" instead of "const static"). + - "%lld" instead of %qd" in printf-type functions. + - "%1$d" is non-standard in printf-type functions. + - "\%" is an undefined character escape sequence. + - text after #endif is not allowed. + - invalid inner-style forward declaration. + - >? and ?= and )\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', + line): + error(filename, linenum, 'build/deprecated', 3, + '>? and ))?' + # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;' + error(filename, linenum, 'runtime/member_string_references', 2, + 'const string& members are dangerous. It is much better to use ' + 'alternatives, such as pointers or simple constants.') + + # Everything else in this function operates on class declarations. + # Return early if the top of the nesting stack is not a class, or if + # the class head is not completed yet. + classinfo = nesting_state.InnermostClass() + if not classinfo or not classinfo.seen_open_brace: + return + + # The class may have been declared with namespace or classname qualifiers. + # The constructor and destructor will not have those qualifiers. + base_classname = classinfo.name.split('::')[-1] + + # Look for single-argument constructors that aren't marked explicit. + # Technically a valid construct, but against style. + explicit_constructor_match = Match( + r'\s+(?:(?:inline|constexpr)\s+)*(explicit\s+)?' + r'(?:(?:inline|constexpr)\s+)*%s\s*' + r'\(((?:[^()]|\([^()]*\))*)\)' + % re.escape(base_classname), + line) + + if explicit_constructor_match: + is_marked_explicit = explicit_constructor_match.group(1) + + if not explicit_constructor_match.group(2): + constructor_args = [] + else: + constructor_args = explicit_constructor_match.group(2).split(',') + + # collapse arguments so that commas in template parameter lists and function + # argument parameter lists don't split arguments in two + i = 0 + while i < len(constructor_args): + constructor_arg = constructor_args[i] + while (constructor_arg.count('<') > constructor_arg.count('>') or + constructor_arg.count('(') > constructor_arg.count(')')): + constructor_arg += ',' + constructor_args[i + 1] + del constructor_args[i + 1] + constructor_args[i] = constructor_arg + i += 1 + + defaulted_args = [arg for arg in constructor_args if '=' in arg] + noarg_constructor = (not constructor_args or # empty arg list + # 'void' arg specifier + (len(constructor_args) == 1 and + constructor_args[0].strip() == 'void')) + onearg_constructor = ((len(constructor_args) == 1 and # exactly one arg + not noarg_constructor) or + # all but at most one arg defaulted + (len(constructor_args) >= 1 and + not noarg_constructor and + len(defaulted_args) >= len(constructor_args) - 1)) + initializer_list_constructor = bool( + onearg_constructor and + Search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0])) + copy_constructor = bool( + onearg_constructor and + Match(r'(const\s+)?%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&' + % re.escape(base_classname), constructor_args[0].strip())) + + if (not is_marked_explicit and + onearg_constructor and + not initializer_list_constructor and + not copy_constructor): + if defaulted_args: + error(filename, linenum, 'runtime/explicit', 5, + 'Constructors callable with one argument ' + 'should be marked explicit.') + else: + error(filename, linenum, 'runtime/explicit', 5, + 'Single-parameter constructors should be marked explicit.') + elif is_marked_explicit and not onearg_constructor: + if noarg_constructor: + error(filename, linenum, 'runtime/explicit', 5, + 'Zero-parameter constructors should not be marked explicit.') + + +def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error): + """Checks for the correctness of various spacing around function calls. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Since function calls often occur inside if/for/while/switch + # expressions - which have their own, more liberal conventions - we + # first see if we should be looking inside such an expression for a + # function call, to which we can apply more strict standards. + fncall = line # if there's no control flow construct, look at whole line + for pattern in (r'\bif\s*\((.*)\)\s*{', + r'\bfor\s*\((.*)\)\s*{', + r'\bwhile\s*\((.*)\)\s*[{;]', + r'\bswitch\s*\((.*)\)\s*{'): + match = Search(pattern, line) + if match: + fncall = match.group(1) # look inside the parens for function calls + break + + # Except in if/for/while/switch, there should never be space + # immediately inside parens (eg "f( 3, 4 )"). We make an exception + # for nested parens ( (a+b) + c ). Likewise, there should never be + # a space before a ( when it's a function argument. I assume it's a + # function argument when the char before the whitespace is legal in + # a function name (alnum + _) and we're not starting a macro. Also ignore + # pointers and references to arrays and functions coz they're too tricky: + # we use a very simple way to recognize these: + # " (something)(maybe-something)" or + # " (something)(maybe-something," or + # " (something)[something]" + # Note that we assume the contents of [] to be short enough that + # they'll never need to wrap. + if ( # Ignore control structures. + not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b', + fncall) and + # Ignore pointers/references to functions. + not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and + # Ignore pointers/references to arrays. + not Search(r' \([^)]+\)\[[^\]]+\]', fncall)): + if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space after ( in function call') + elif Search(r'\(\s+(?!(\s*\\)|\()', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space after (') + if (Search(r'\w\s+\(', fncall) and + not Search(r'_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(', fncall) and + not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and + not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and + not Search(r'\bcase\s+\(', fncall)): + # TODO(unknown): Space after an operator function seem to be a common + # error, silence those for now by restricting them to highest verbosity. + if Search(r'\boperator_*\b', line): + error(filename, linenum, 'whitespace/parens', 0, + 'Extra space before ( in function call') + else: + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space before ( in function call') + # If the ) is followed only by a newline or a { + newline, assume it's + # part of a control statement (if/while/etc), and don't complain + if Search(r'[^)]\s+\)\s*[^{\s]', fncall): + # If the closing parenthesis is preceded by only whitespaces, + # try to give a more descriptive error message. + if Search(r'^\s+\)', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Closing ) should be moved to the previous line') + else: + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space before )') + + +def IsBlankLine(line): + """Returns true if the given line is blank. + + We consider a line to be blank if the line is empty or consists of + only white spaces. + + Args: + line: A line of a string. + + Returns: + True, if the given line is blank. + """ + return not line or line.isspace() + + +def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error): + is_namespace_indent_item = ( + len(nesting_state.stack) > 1 and + nesting_state.stack[-1].check_namespace_indentation and + isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and + nesting_state.previous_stack_top == nesting_state.stack[-2]) + + if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + clean_lines.elided, line): + CheckItemIndentationInNamespace(filename, clean_lines.elided, + line, error) + + +def CheckForFunctionLengths(filename, clean_lines, linenum, + function_state, error): + """Reports for long function bodies. + + For an overview why this is done, see: + https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions + + Uses a simplistic algorithm assuming other style guidelines + (especially spacing) are followed. + Only checks unindented functions, so class members are unchecked. + Trivial bodies are unchecked, so constructors with huge initializer lists + may be missed. + Blank/comment lines are not counted so as to avoid encouraging the removal + of vertical space and comments just to get through a lint check. + NOLINT *on the last line of a function* disables this check. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + function_state: Current function name and lines in body so far. + error: The function to call with any errors found. + """ + lines = clean_lines.lines + line = lines[linenum] + joined_line = '' + + starting_func = False + regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ... + match_result = Match(regexp, line) + if match_result: + # If the name is all caps and underscores, figure it's a macro and + # ignore it, unless it's TEST or TEST_F. + function_name = match_result.group(1).split()[-1] + if function_name == 'TEST' or function_name == 'TEST_F' or ( + not Match(r'[A-Z_]+$', function_name)): + starting_func = True + + if starting_func: + body_found = False + for start_linenum in xrange(linenum, clean_lines.NumLines()): + start_line = lines[start_linenum] + joined_line += ' ' + start_line.lstrip() + if Search(r'(;|})', start_line): # Declarations and trivial functions + body_found = True + break # ... ignore + elif Search(r'{', start_line): + body_found = True + function = Search(r'((\w|:)*)\(', line).group(1) + if Match(r'TEST', function): # Handle TEST... macros + parameter_regexp = Search(r'(\(.*\))', joined_line) + if parameter_regexp: # Ignore bad syntax + function += parameter_regexp.group(1) + else: + function += '()' + function_state.Begin(function) + break + if not body_found: + # No body for the function (or evidence of a non-function) was found. + error(filename, linenum, 'readability/fn_size', 5, + 'Lint failed to find start of function body.') + elif Match(r'^\}\s*$', line): # function end + function_state.Check(error, filename, linenum) + function_state.End() + elif not Match(r'^\s*$', line): + function_state.Count() # Count non-blank/non-comment lines. + + +_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?') + + +def CheckComment(line, filename, linenum, next_line_start, error): + """Checks for common mistakes in comments. + + Args: + line: The line in question. + filename: The name of the current file. + linenum: The number of the line to check. + next_line_start: The first non-whitespace column of the next line. + error: The function to call with any errors found. + """ + commentpos = line.find('//') + if commentpos != -1: + # Check if the // may be in quotes. If so, ignore it + if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0: + # Allow one space for new scopes, two spaces otherwise: + if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos) and + ((commentpos >= 1 and + line[commentpos-1] not in string.whitespace) or + (commentpos >= 2 and + line[commentpos-2] not in string.whitespace))): + error(filename, linenum, 'whitespace/comments', 2, + 'At least two spaces is best between code and comments') + + # Checks for common mistakes in TODO comments. + comment = line[commentpos:] + match = _RE_PATTERN_TODO.match(comment) + if match: + # One whitespace is correct; zero whitespace is handled elsewhere. + leading_whitespace = match.group(1) + if len(leading_whitespace) > 1: + error(filename, linenum, 'whitespace/todo', 2, + 'Too many spaces before TODO') + + username = match.group(2) + if not username: + error(filename, linenum, 'readability/todo', 2, + 'Missing username in TODO; it should look like ' + '"// TODO(my_username): Stuff."') + + middle_whitespace = match.group(3) + # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison + if middle_whitespace != ' ' and middle_whitespace != '': + error(filename, linenum, 'whitespace/todo', 2, + 'TODO(my_username) should be followed by a space') + + # If the comment contains an alphanumeric character, there + # should be a space somewhere between it and the // unless + # it's a /// or //! Doxygen comment. + if (Match(r'//[^ ]*\w', comment) and + not Match(r'(///|//\!)(\s+|$)', comment)): + error(filename, linenum, 'whitespace/comments', 4, + 'Should have a space between // and comment') + + +def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): + """Checks for the correctness of various spacing issues in the code. + + Things we check for: spaces around operators, spaces after + if/for/while/switch, no spaces around parens in function calls, two + spaces between code and comment, don't start a block with a blank + line, don't end a function with a blank line, don't add a blank line + after public/protected/private, don't have too many blank lines in a row. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw = clean_lines.lines_without_raw_strings + line = raw[linenum] + + # Before nixing comments, check if the line is blank for no good + # reason. This includes the first line after a block is opened, and + # blank lines at the end of a function (ie, right before a line like '}' + # + # Skip all the blank line checks if we are immediately inside a + # namespace body. In other words, don't issue blank line warnings + # for this block: + # namespace { + # + # } + # + # A warning about missing end of namespace comments will be issued instead. + # + # Also skip blank line checks for 'extern "C"' blocks, which are formatted + # like namespaces. + if (IsBlankLine(line) and + not nesting_state.InNamespaceBody() and + not nesting_state.InExternC()): + elided = clean_lines.elided + prev_line = elided[linenum - 1] + prevbrace = prev_line.rfind('{') + # TODO(unknown): Don't complain if line before blank line, and line after, + # both start with alnums and are indented the same amount. + # This ignores whitespace at the start of a namespace block + # because those are not usually indented. + if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1: + # OK, we have a blank line at the start of a code block. Before we + # complain, we check if it is an exception to the rule: The previous + # non-empty line has the parameters of a function header that are indented + # 4 spaces (because they did not fit in a 80 column line when placed on + # the same line as the function name). We also check for the case where + # the previous line is indented 6 spaces, which may happen when the + # initializers of a constructor do not fit into a 80 column line. + exception = False + if Match(r' {6}\w', prev_line): # Initializer list? + # We are looking for the opening column of initializer list, which + # should be indented 4 spaces to cause 6 space indentation afterwards. + search_position = linenum-2 + while (search_position >= 0 + and Match(r' {6}\w', elided[search_position])): + search_position -= 1 + exception = (search_position >= 0 + and elided[search_position][:5] == ' :') + else: + # Search for the function arguments or an initializer list. We use a + # simple heuristic here: If the line is indented 4 spaces; and we have a + # closing paren, without the opening paren, followed by an opening brace + # or colon (for initializer lists) we assume that it is the last line of + # a function header. If we have a colon indented 4 spaces, it is an + # initializer list. + exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', + prev_line) + or Match(r' {4}:', prev_line)) + + if not exception: + error(filename, linenum, 'whitespace/blank_line', 2, + 'Redundant blank line at the start of a code block ' + 'should be deleted.') + # Ignore blank lines at the end of a block in a long if-else + # chain, like this: + # if (condition1) { + # // Something followed by a blank line + # + # } else if (condition2) { + # // Something else + # } + if linenum + 1 < clean_lines.NumLines(): + next_line = raw[linenum + 1] + if (next_line + and Match(r'\s*}', next_line) + and next_line.find('} else ') == -1): + error(filename, linenum, 'whitespace/blank_line', 3, + 'Redundant blank line at the end of a code block ' + 'should be deleted.') + + matched = Match(r'\s*(public|protected|private):', prev_line) + if matched: + error(filename, linenum, 'whitespace/blank_line', 3, + 'Do not leave a blank line after "%s:"' % matched.group(1)) + + # Next, check comments + next_line_start = 0 + if linenum + 1 < clean_lines.NumLines(): + next_line = raw[linenum + 1] + next_line_start = len(next_line) - len(next_line.lstrip()) + CheckComment(line, filename, linenum, next_line_start, error) + + # get rid of comments and strings + line = clean_lines.elided[linenum] + + # You shouldn't have spaces before your brackets, except maybe after + # 'delete []' or 'return []() {};' + if Search(r'\w\s+\[', line) and not Search(r'(?:delete|return)\s+\[', line): + error(filename, linenum, 'whitespace/braces', 5, + 'Extra space before [') + + # In range-based for, we wanted spaces before and after the colon, but + # not around "::" tokens that might appear. + if (Search(r'for *\(.*[^:]:[^: ]', line) or + Search(r'for *\(.*[^: ]:[^:]', line)): + error(filename, linenum, 'whitespace/forcolon', 2, + 'Missing space around colon in range-based for loop') + + +def CheckOperatorSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing around operators. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Don't try to do spacing checks for operator methods. Do this by + # replacing the troublesome characters with something else, + # preserving column position for all other characters. + # + # The replacement is done repeatedly to avoid false positives from + # operators that call operators. + while True: + match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line) + if match: + line = match.group(1) + ('_' * len(match.group(2))) + match.group(3) + else: + break + + # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". + # Otherwise not. Note we only check for non-spaces on *both* sides; + # sometimes people put non-spaces on one side when aligning ='s among + # many lines (not that this is behavior that I approve of...) + if ((Search(r'[\w.]=', line) or + Search(r'=[\w.]', line)) + and not Search(r'\b(if|while|for) ', line) + # Operators taken from [lex.operators] in C++11 standard. + and not Search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line) + and not Search(r'operator=', line)): + error(filename, linenum, 'whitespace/operators', 4, + 'Missing spaces around =') + + # It's ok not to have spaces around binary operators like + - * /, but if + # there's too little whitespace, we get concerned. It's hard to tell, + # though, so we punt on this one for now. TODO. + + # You should always have whitespace around binary operators. + # + # Check <= and >= first to avoid false positives with < and >, then + # check non-include lines for spacing around < and >. + # + # If the operator is followed by a comma, assume it's be used in a + # macro context and don't do any checks. This avoids false + # positives. + # + # Note that && is not included here. This is because there are too + # many false positives due to RValue references. + match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line) + if match: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around %s' % match.group(1)) + elif not Match(r'#.*include', line): + # Look for < that is not surrounded by spaces. This is only + # triggered if both sides are missing spaces, even though + # technically should should flag if at least one side is missing a + # space. This is done to avoid some false positives with shifts. + match = Match(r'^(.*[^\s<])<[^\s=<,]', line) + if match: + (_, _, end_pos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + if end_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <') + + # Look for > that is not surrounded by spaces. Similar to the + # above, we only trigger if both sides are missing spaces to avoid + # false positives with shifts. + match = Match(r'^(.*[^-\s>])>[^\s=>,]', line) + if match: + (_, _, start_pos) = ReverseCloseExpression( + clean_lines, linenum, len(match.group(1))) + if start_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >') + + # We allow no-spaces around << when used like this: 10<<20, but + # not otherwise (particularly, not when used as streams) + # + # We also allow operators following an opening parenthesis, since + # those tend to be macros that deal with operators. + match = Search(r'(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])', line) + if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and + not (match.group(1) == 'operator' and match.group(2) == ';')): + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <<') + + # We allow no-spaces around >> for almost anything. This is because + # C++11 allows ">>" to close nested templates, which accounts for + # most cases when ">>" is not followed by a space. + # + # We still warn on ">>" followed by alpha character, because that is + # likely due to ">>" being used for right shifts, e.g.: + # value >> alpha + # + # When ">>" is used to close templates, the alphanumeric letter that + # follows would be part of an identifier, and there should still be + # a space separating the template type and the identifier. + # type> alpha + match = Search(r'>>[a-zA-Z_]', line) + if match: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >>') + + # There shouldn't be space around unary operators + match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) + if match: + error(filename, linenum, 'whitespace/operators', 4, + 'Extra space for operator %s' % match.group(1)) + + +def CheckParenthesisSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing around parentheses. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # No spaces after an if, while, switch, or for + match = Search(r' (if\(|for\(|while\(|switch\()', line) + if match: + error(filename, linenum, 'whitespace/parens', 5, + 'Missing space before ( in %s' % match.group(1)) + + # For if/for/while/switch, the left and right parens should be + # consistent about how many spaces are inside the parens, and + # there should either be zero or one spaces inside the parens. + # We don't want: "if ( foo)" or "if ( foo )". + # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. + match = Search(r'\b(if|for|while|switch)\s*' + r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', + line) + if match: + if len(match.group(2)) != len(match.group(4)): + if not (match.group(3) == ';' and + len(match.group(2)) == 1 + len(match.group(4)) or + not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)): + error(filename, linenum, 'whitespace/parens', 5, + 'Mismatching spaces inside () in %s' % match.group(1)) + if len(match.group(2)) not in [0, 1]: + error(filename, linenum, 'whitespace/parens', 5, + 'Should have zero or one spaces inside ( and ) in %s' % + match.group(1)) + + +def CheckCommaSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing near commas and semicolons. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + raw = clean_lines.lines_without_raw_strings + line = clean_lines.elided[linenum] + + # You should always have a space after a comma (either as fn arg or operator) + # + # This does not apply when the non-space character following the + # comma is another comma, since the only time when that happens is + # for empty macro arguments. + # + # We run this check in two passes: first pass on elided lines to + # verify that lines contain missing whitespaces, second pass on raw + # lines to confirm that those missing whitespaces are not due to + # elided comments. + if (Search(r',[^,\s]', ReplaceAll(r'\boperator\s*,\s*\(', 'F(', line)) and + Search(r',[^,\s]', raw[linenum])): + error(filename, linenum, 'whitespace/comma', 3, + 'Missing space after ,') + + # You should always have a space after a semicolon + # except for few corner cases + # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more + # space after ; + if Search(r';[^\s};\\)/]', line): + error(filename, linenum, 'whitespace/semicolon', 3, + 'Missing space after ;') + + +def _IsType(clean_lines, nesting_state, expr): + """Check if expression looks like a type name, returns true if so. + + Args: + clean_lines: A CleansedLines instance containing the file. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + expr: The expression to check. + Returns: + True, if token looks like a type. + """ + # Keep only the last token in the expression + last_word = Match(r'^.*(\b\S+)$', expr) + if last_word: + token = last_word.group(1) + else: + token = expr + + # Match native types and stdint types + if _TYPES.match(token): + return True + + # Try a bit harder to match templated types. Walk up the nesting + # stack until we find something that resembles a typename + # declaration for what we are looking for. + typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) + + r'\b') + block_index = len(nesting_state.stack) - 1 + while block_index >= 0: + if isinstance(nesting_state.stack[block_index], _NamespaceInfo): + return False + + # Found where the opening brace is. We want to scan from this + # line up to the beginning of the function, minus a few lines. + # template + # class C + # : public ... { // start scanning here + last_line = nesting_state.stack[block_index].starting_linenum + + next_block_start = 0 + if block_index > 0: + next_block_start = nesting_state.stack[block_index - 1].starting_linenum + first_line = last_line + while first_line >= next_block_start: + if clean_lines.elided[first_line].find('template') >= 0: + break + first_line -= 1 + if first_line < next_block_start: + # Didn't find any "template" keyword before reaching the next block, + # there are probably no template things to check for this block + block_index -= 1 + continue + + # Look for typename in the specified range + for i in xrange(first_line, last_line + 1, 1): + if Search(typename_pattern, clean_lines.elided[i]): + return True + block_index -= 1 + + return False + + +def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error): + """Checks for horizontal spacing near commas. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Except after an opening paren, or after another opening brace (in case of + # an initializer list, for instance), you should have spaces before your + # braces when they are delimiting blocks, classes, namespaces etc. + # And since you should never have braces at the beginning of a line, + # this is an easy test. Except that braces used for initialization don't + # follow the same rule; we often don't want spaces before those. + match = Match(r'^(.*[^ ({>]){', line) + + if match: + # Try a bit harder to check for brace initialization. This + # happens in one of the following forms: + # Constructor() : initializer_list_{} { ... } + # Constructor{}.MemberFunction() + # Type variable{}; + # FunctionCall(type{}, ...); + # LastArgument(..., type{}); + # LOG(INFO) << type{} << " ..."; + # map_of_type[{...}] = ...; + # ternary = expr ? new type{} : nullptr; + # OuterTemplate{}> + # + # We check for the character following the closing brace, and + # silence the warning if it's one of those listed above, i.e. + # "{.;,)<>]:". + # + # To account for nested initializer list, we allow any number of + # closing braces up to "{;,)<". We can't simply silence the + # warning on first sight of closing brace, because that would + # cause false negatives for things that are not initializer lists. + # Silence this: But not this: + # Outer{ if (...) { + # Inner{...} if (...){ // Missing space before { + # }; } + # + # There is a false negative with this approach if people inserted + # spurious semicolons, e.g. "if (cond){};", but we will catch the + # spurious semicolon with a separate check. + leading_text = match.group(1) + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + trailing_text = '' + if endpos > -1: + trailing_text = endline[endpos:] + for offset in xrange(endlinenum + 1, + min(endlinenum + 3, clean_lines.NumLines() - 1)): + trailing_text += clean_lines.elided[offset] + # We also suppress warnings for `uint64_t{expression}` etc., as the style + # guide recommends brace initialization for integral types to avoid + # overflow/truncation. + if (not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text) + and not _IsType(clean_lines, nesting_state, leading_text)): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before {') + + # Make sure '} else {' has spaces. + if Search(r'}else', line): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before else') + + # You shouldn't have a space before a semicolon at the end of the line. + # There's a special case for "for" since the style guide allows space before + # the semicolon there. + if Search(r':\s*;\s*$', line): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Semicolon defining empty statement. Use {} instead.') + elif Search(r'^\s*;\s*$', line): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Line contains only semicolon. If this should be an empty statement, ' + 'use {} instead.') + elif (Search(r'\s+;\s*$', line) and + not Search(r'\bfor\b', line)): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Extra space before last semicolon. If this should be an empty ' + 'statement, use {} instead.') + + +def IsDecltype(clean_lines, linenum, column): + """Check if the token ending on (linenum, column) is decltype(). + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: the number of the line to check. + column: end column of the token to check. + Returns: + True if this token is decltype() expression, False otherwise. + """ + (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column) + if start_col < 0: + return False + if Search(r'\bdecltype\s*$', text[0:start_col]): + return True + return False + + +def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): + """Checks for additional blank line issues related to sections. + + Currently the only thing checked here is blank line before protected/private. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + class_info: A _ClassInfo objects. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Skip checks if the class is small, where small means 25 lines or less. + # 25 lines seems like a good cutoff since that's the usual height of + # terminals, and any class that can't fit in one screen can't really + # be considered "small". + # + # Also skip checks if we are on the first line. This accounts for + # classes that look like + # class Foo { public: ... }; + # + # If we didn't find the end of the class, last_line would be zero, + # and the check will be skipped by the first condition. + if (class_info.last_line - class_info.starting_linenum <= 24 or + linenum <= class_info.starting_linenum): + return + + matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum]) + if matched: + # Issue warning if the line before public/protected/private was + # not a blank line, but don't do this if the previous line contains + # "class" or "struct". This can happen two ways: + # - We are at the beginning of the class. + # - We are forward-declaring an inner class that is semantically + # private, but needed to be public for implementation reasons. + # Also ignores cases where the previous line ends with a backslash as can be + # common when defining classes in C macros. + prev_line = clean_lines.lines[linenum - 1] + if (not IsBlankLine(prev_line) and + not Search(r'\b(class|struct)\b', prev_line) and + not Search(r'\\$', prev_line)): + # Try a bit harder to find the beginning of the class. This is to + # account for multi-line base-specifier lists, e.g.: + # class Derived + # : public Base { + end_class_head = class_info.starting_linenum + for i in range(class_info.starting_linenum, linenum): + if Search(r'\{\s*$', clean_lines.lines[i]): + end_class_head = i + break + if end_class_head < linenum - 1: + error(filename, linenum, 'whitespace/blank_line', 3, + '"%s:" should be preceded by a blank line' % matched.group(1)) + + +def GetPreviousNonBlankLine(clean_lines, linenum): + """Return the most recent non-blank line and its line number. + + Args: + clean_lines: A CleansedLines instance containing the file contents. + linenum: The number of the line to check. + + Returns: + A tuple with two elements. The first element is the contents of the last + non-blank line before the current line, or the empty string if this is the + first non-blank line. The second is the line number of that line, or -1 + if this is the first non-blank line. + """ + + prevlinenum = linenum - 1 + while prevlinenum >= 0: + prevline = clean_lines.elided[prevlinenum] + if not IsBlankLine(prevline): # if not a blank line... + return (prevline, prevlinenum) + prevlinenum -= 1 + return ('', -1) + + +def CheckBraces(filename, clean_lines, linenum, error): + """Looks for misplaced braces (e.g. at the end of line). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] # get rid of comments and strings + + if Match(r'\s*{\s*$', line): + # We allow an open brace to start a line in the case where someone is using + # braces in a block to explicitly create a new scope, which is commonly used + # to control the lifetime of stack-allocated variables. Braces are also + # used for brace initializers inside function calls. We don't detect this + # perfectly: we just don't complain if the last non-whitespace character on + # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the + # previous line starts a preprocessor block. We also allow a brace on the + # following line if it is part of an array initialization and would not fit + # within the 80 character limit of the preceding line. + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if (not Search(r'[,;:}{(]\s*$', prevline) and + not Match(r'\s*#', prevline) and + not (GetLineWidth(prevline) > _line_length - 2 and '[]' in prevline)): + error(filename, linenum, 'whitespace/braces', 4, + '{ should almost always be at the end of the previous line') + + # An else clause should be on the same line as the preceding closing brace. + if Match(r'\s*else\b\s*(?:if\b|\{|$)', line): + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if Match(r'\s*}\s*$', prevline): + error(filename, linenum, 'whitespace/newline', 4, + 'An else should appear on the same line as the preceding }') + + # If braces come on one side of an else, they should be on both. + # However, we have to worry about "else if" that spans multiple lines! + if Search(r'else if\s*\(', line): # could be multi-line if + brace_on_left = bool(Search(r'}\s*else if\s*\(', line)) + # find the ( after the if + pos = line.find('else if') + pos = line.find('(', pos) + if pos > 0: + (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos) + brace_on_right = endline[endpos:].find('{') != -1 + if brace_on_left != brace_on_right: # must be brace after if + error(filename, linenum, 'readability/braces', 5, + 'If an else has a brace on one side, it should have it on both') + elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line): + error(filename, linenum, 'readability/braces', 5, + 'If an else has a brace on one side, it should have it on both') + + # Likewise, an else should never have the else clause on the same line + if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line): + error(filename, linenum, 'whitespace/newline', 4, + 'Else clause should never be on same line as else (use 2 lines)') + + # In the same way, a do/while should never be on one line + if Match(r'\s*do [^\s{]', line): + error(filename, linenum, 'whitespace/newline', 4, + 'do/while clauses should not be on a single line') + + # Check single-line if/else bodies. The style guide says 'curly braces are not + # required for single-line statements'. We additionally allow multi-line, + # single statements, but we reject anything with more than one semicolon in + # it. This means that the first semicolon after the if should be at the end of + # its line, and the line after that should have an indent level equal to or + # lower than the if. We also check for ambiguous if/else nesting without + # braces. + if_else_match = Search(r'\b(if\s*\(|else\b)', line) + if if_else_match and not Match(r'\s*#', line): + if_indent = GetIndentLevel(line) + endline, endlinenum, endpos = line, linenum, if_else_match.end() + if_match = Search(r'\bif\s*\(', line) + if if_match: + # This could be a multiline if condition, so find the end first. + pos = if_match.end() - 1 + (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos) + # Check for an opening brace, either directly after the if or on the next + # line. If found, this isn't a single-statement conditional. + if (not Match(r'\s*{', endline[endpos:]) + and not (Match(r'\s*$', endline[endpos:]) + and endlinenum < (len(clean_lines.elided) - 1) + and Match(r'\s*{', clean_lines.elided[endlinenum + 1]))): + while (endlinenum < len(clean_lines.elided) + and ';' not in clean_lines.elided[endlinenum][endpos:]): + endlinenum += 1 + endpos = 0 + if endlinenum < len(clean_lines.elided): + endline = clean_lines.elided[endlinenum] + # We allow a mix of whitespace and closing braces (e.g. for one-liner + # methods) and a single \ after the semicolon (for macros) + endpos = endline.find(';') + if not Match(r';[\s}]*(\\?)$', endline[endpos:]): + # Semicolon isn't the last character, there's something trailing. + # Output a warning if the semicolon is not contained inside + # a lambda expression. + if not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$', + endline): + error(filename, linenum, 'readability/braces', 4, + 'If/else bodies with multiple statements require braces') + elif endlinenum < len(clean_lines.elided) - 1: + # Make sure the next line is dedented + next_line = clean_lines.elided[endlinenum + 1] + next_indent = GetIndentLevel(next_line) + # With ambiguous nested if statements, this will error out on the + # if that *doesn't* match the else, regardless of whether it's the + # inner one or outer one. + if (if_match and Match(r'\s*else\b', next_line) + and next_indent != if_indent): + error(filename, linenum, 'readability/braces', 4, + 'Else clause should be indented at the same level as if. ' + 'Ambiguous nested if/else chains require braces.') + elif next_indent > if_indent: + error(filename, linenum, 'readability/braces', 4, + 'If/else bodies with multiple statements require braces') + + +def CheckTrailingSemicolon(filename, clean_lines, linenum, error): + """Looks for redundant trailing semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] + + # Block bodies should not be followed by a semicolon. Due to C++11 + # brace initialization, there are more places where semicolons are + # required than not, so we use a whitelist approach to check these + # rather than a blacklist. These are the places where "};" should + # be replaced by just "}": + # 1. Some flavor of block following closing parenthesis: + # for (;;) {}; + # while (...) {}; + # switch (...) {}; + # Function(...) {}; + # if (...) {}; + # if (...) else if (...) {}; + # + # 2. else block: + # if (...) else {}; + # + # 3. const member function: + # Function(...) const {}; + # + # 4. Block following some statement: + # x = 42; + # {}; + # + # 5. Block at the beginning of a function: + # Function(...) { + # {}; + # } + # + # Note that naively checking for the preceding "{" will also match + # braces inside multi-dimensional arrays, but this is fine since + # that expression will not contain semicolons. + # + # 6. Block following another block: + # while (true) {} + # {}; + # + # 7. End of namespaces: + # namespace {}; + # + # These semicolons seems far more common than other kinds of + # redundant semicolons, possibly due to people converting classes + # to namespaces. For now we do not warn for this case. + # + # Try matching case 1 first. + match = Match(r'^(.*\)\s*)\{', line) + if match: + # Matched closing parenthesis (case 1). Check the token before the + # matching opening parenthesis, and don't warn if it looks like a + # macro. This avoids these false positives: + # - macro that defines a base class + # - multi-line macro that defines a base class + # - macro that defines the whole class-head + # + # But we still issue warnings for macros that we know are safe to + # warn, specifically: + # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P + # - TYPED_TEST + # - INTERFACE_DEF + # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: + # + # We implement a whitelist of safe macros instead of a blacklist of + # unsafe macros, even though the latter appears less frequently in + # google code and would have been easier to implement. This is because + # the downside for getting the whitelist wrong means some extra + # semicolons, while the downside for getting the blacklist wrong + # would result in compile errors. + # + # In addition to macros, we also don't want to warn on + # - Compound literals + # - Lambdas + # - alignas specifier with anonymous structs + # - decltype + closing_brace_pos = match.group(1).rfind(')') + opening_parenthesis = ReverseCloseExpression( + clean_lines, linenum, closing_brace_pos) + if opening_parenthesis[2] > -1: + line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]] + macro = Search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix) + func = Match(r'^(.*\])\s*$', line_prefix) + if ((macro and + macro.group(1) not in ( + 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST', + 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED', + 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or + (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or + Search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or + Search(r'\bdecltype$', line_prefix) or + Search(r'\s+=\s*$', line_prefix)): + match = None + if (match and + opening_parenthesis[1] > 1 and + Search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])): + # Multi-line lambda-expression + match = None + + else: + # Try matching cases 2-3. + match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line) + if not match: + # Try matching cases 4-6. These are always matched on separate lines. + # + # Note that we can't simply concatenate the previous line to the + # current line and do a single match, otherwise we may output + # duplicate warnings for the blank line case: + # if (cond) { + # // blank line + # } + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if prevline and Search(r'[;{}]\s*$', prevline): + match = Match(r'^(\s*)\{', line) + + # Check matching closing brace + if match: + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + if endpos > -1 and Match(r'^\s*;', endline[endpos:]): + # Current {} pair is eligible for semicolon check, and we have found + # the redundant semicolon, output warning here. + # + # Note: because we are scanning forward for opening braces, and + # outputting warnings for the matching closing brace, if there are + # nested blocks with trailing semicolons, we will get the error + # messages in reversed order. + + # We need to check the line forward for NOLINT + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions(filename, raw_lines[endlinenum-1], endlinenum-1, + error) + ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum, + error) + + error(filename, endlinenum, 'readability/braces', 4, + "You don't need a ; after a }") + + +def CheckEmptyBlockBody(filename, clean_lines, linenum, error): + """Look for empty loop/conditional body with only a single semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Search for loop keywords at the beginning of the line. Because only + # whitespaces are allowed before the keywords, this will also ignore most + # do-while-loops, since those lines should start with closing brace. + # + # We also check "if" blocks here, since an empty conditional block + # is likely an error. + line = clean_lines.elided[linenum] + matched = Match(r'\s*(for|while|if)\s*\(', line) + if matched: + # Find the end of the conditional expression. + (end_line, end_linenum, end_pos) = CloseExpression( + clean_lines, linenum, line.find('(')) + + # Output warning if what follows the condition expression is a semicolon. + # No warning for all other cases, including whitespace or newline, since we + # have a separate check for semicolons preceded by whitespace. + if end_pos >= 0 and Match(r';', end_line[end_pos:]): + if matched.group(1) == 'if': + error(filename, end_linenum, 'whitespace/empty_conditional_body', 5, + 'Empty conditional bodies should use {}') + else: + error(filename, end_linenum, 'whitespace/empty_loop_body', 5, + 'Empty loop bodies should use {} or continue') + + # Check for if statements that have completely empty bodies (no comments) + # and no else clauses. + if end_pos >= 0 and matched.group(1) == 'if': + # Find the position of the opening { for the if statement. + # Return without logging an error if it has no brackets. + opening_linenum = end_linenum + opening_line_fragment = end_line[end_pos:] + # Loop until EOF or find anything that's not whitespace or opening {. + while not Search(r'^\s*\{', opening_line_fragment): + if Search(r'^(?!\s*$)', opening_line_fragment): + # Conditional has no brackets. + return + opening_linenum += 1 + if opening_linenum == len(clean_lines.elided): + # Couldn't find conditional's opening { or any code before EOF. + return + opening_line_fragment = clean_lines.elided[opening_linenum] + # Set opening_line (opening_line_fragment may not be entire opening line). + opening_line = clean_lines.elided[opening_linenum] + + # Find the position of the closing }. + opening_pos = opening_line_fragment.find('{') + if opening_linenum == end_linenum: + # We need to make opening_pos relative to the start of the entire line. + opening_pos += end_pos + (closing_line, closing_linenum, closing_pos) = CloseExpression( + clean_lines, opening_linenum, opening_pos) + if closing_pos < 0: + return + + # Now construct the body of the conditional. This consists of the portion + # of the opening line after the {, all lines until the closing line, + # and the portion of the closing line before the }. + if (clean_lines.raw_lines[opening_linenum] != + CleanseComments(clean_lines.raw_lines[opening_linenum])): + # Opening line ends with a comment, so conditional isn't empty. + return + if closing_linenum > opening_linenum: + # Opening line after the {. Ignore comments here since we checked above. + body = list(opening_line[opening_pos+1:]) + # All lines until closing line, excluding closing line, with comments. + body.extend(clean_lines.raw_lines[opening_linenum+1:closing_linenum]) + # Closing line before the }. Won't (and can't) have comments. + body.append(clean_lines.elided[closing_linenum][:closing_pos-1]) + body = '\n'.join(body) + else: + # If statement has brackets and fits on a single line. + body = opening_line[opening_pos+1:closing_pos-1] + + # Check if the body is empty + if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body): + return + # The body is empty. Now make sure there's not an else clause. + current_linenum = closing_linenum + current_line_fragment = closing_line[closing_pos:] + # Loop until EOF or find anything that's not whitespace or else clause. + while Search(r'^\s*$|^(?=\s*else)', current_line_fragment): + if Search(r'^(?=\s*else)', current_line_fragment): + # Found an else clause, so don't log an error. + return + current_linenum += 1 + if current_linenum == len(clean_lines.elided): + break + current_line_fragment = clean_lines.elided[current_linenum] + + # The body is empty and there's no else clause until EOF or other code. + error(filename, end_linenum, 'whitespace/empty_if_body', 4, + ('If statement had no body and no else clause')) + + +def FindCheckMacro(line): + """Find a replaceable CHECK-like macro. + + Args: + line: line to search on. + Returns: + (macro name, start position), or (None, -1) if no replaceable + macro is found. + """ + for macro in _CHECK_MACROS: + i = line.find(macro) + if i >= 0: + # Find opening parenthesis. Do a regular expression match here + # to make sure that we are matching the expected CHECK macro, as + # opposed to some other macro that happens to contain the CHECK + # substring. + matched = Match(r'^(.*\b' + macro + r'\s*)\(', line) + if not matched: + continue + return (macro, len(matched.group(1))) + return (None, -1) + + +def CheckCheck(filename, clean_lines, linenum, error): + """Checks the use of CHECK and EXPECT macros. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Decide the set of replacement macros that should be suggested + lines = clean_lines.elided + (check_macro, start_pos) = FindCheckMacro(lines[linenum]) + if not check_macro: + return + + # Find end of the boolean expression by matching parentheses + (last_line, end_line, end_pos) = CloseExpression( + clean_lines, linenum, start_pos) + if end_pos < 0: + return + + # If the check macro is followed by something other than a + # semicolon, assume users will log their own custom error messages + # and don't suggest any replacements. + if not Match(r'\s*;', last_line[end_pos:]): + return + + if linenum == end_line: + expression = lines[linenum][start_pos + 1:end_pos - 1] + else: + expression = lines[linenum][start_pos + 1:] + for i in xrange(linenum + 1, end_line): + expression += lines[i] + expression += last_line[0:end_pos - 1] + + # Parse expression so that we can take parentheses into account. + # This avoids false positives for inputs like "CHECK((a < 4) == b)", + # which is not replaceable by CHECK_LE. + lhs = '' + rhs = '' + operator = None + while expression: + matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||' + r'==|!=|>=|>|<=|<|\()(.*)$', expression) + if matched: + token = matched.group(1) + if token == '(': + # Parenthesized operand + expression = matched.group(2) + (end, _) = FindEndOfExpressionInLine(expression, 0, ['(']) + if end < 0: + return # Unmatched parenthesis + lhs += '(' + expression[0:end] + expression = expression[end:] + elif token in ('&&', '||'): + # Logical and/or operators. This means the expression + # contains more than one term, for example: + # CHECK(42 < a && a < b); + # + # These are not replaceable with CHECK_LE, so bail out early. + return + elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'): + # Non-relational operator + lhs += token + expression = matched.group(2) + else: + # Relational operator + operator = token + rhs = matched.group(2) + break + else: + # Unparenthesized operand. Instead of appending to lhs one character + # at a time, we do another regular expression match to consume several + # characters at once if possible. Trivial benchmark shows that this + # is more efficient when the operands are longer than a single + # character, which is generally the case. + matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression) + if not matched: + matched = Match(r'^(\s*\S)(.*)$', expression) + if not matched: + break + lhs += matched.group(1) + expression = matched.group(2) + + # Only apply checks if we got all parts of the boolean expression + if not (lhs and operator and rhs): + return + + # Check that rhs do not contain logical operators. We already know + # that lhs is fine since the loop above parses out && and ||. + if rhs.find('&&') > -1 or rhs.find('||') > -1: + return + + # At least one of the operands must be a constant literal. This is + # to avoid suggesting replacements for unprintable things like + # CHECK(variable != iterator) + # + # The following pattern matches decimal, hex integers, strings, and + # characters (in that order). + lhs = lhs.strip() + rhs = rhs.strip() + match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' + if Match(match_constant, lhs) or Match(match_constant, rhs): + # Note: since we know both lhs and rhs, we can provide a more + # descriptive error message like: + # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) + # Instead of: + # Consider using CHECK_EQ instead of CHECK(a == b) + # + # We are still keeping the less descriptive message because if lhs + # or rhs gets long, the error message might become unreadable. + error(filename, linenum, 'readability/check', 2, + 'Consider using %s instead of %s(a %s b)' % ( + _CHECK_REPLACEMENT[check_macro][operator], + check_macro, operator)) + + +def CheckAltTokens(filename, clean_lines, linenum, error): + """Check alternative keywords being used in boolean expressions. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Avoid preprocessor lines + if Match(r'^\s*#', line): + return + + # Last ditch effort to avoid multi-line comments. This will not help + # if the comment started before the current line or ended after the + # current line, but it catches most of the false positives. At least, + # it provides a way to workaround this warning for people who use + # multi-line comments in preprocessor macros. + # + # TODO(unknown): remove this once cpplint has better support for + # multi-line comments. + if line.find('/*') >= 0 or line.find('*/') >= 0: + return + + for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): + error(filename, linenum, 'readability/alt_tokens', 2, + 'Use operator %s instead of %s' % ( + _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1))) + + +def GetLineWidth(line): + """Determines the width of the line in column positions. + + Args: + line: A string, which may be a Unicode string. + + Returns: + The width of the line in column positions, accounting for Unicode + combining characters and wide characters. + """ + if isinstance(line, unicode): + width = 0 + for uc in unicodedata.normalize('NFC', line): + if unicodedata.east_asian_width(uc) in ('W', 'F'): + width += 2 + elif not unicodedata.combining(uc): + width += 1 + return width + else: + return len(line) + + +def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, + error): + """Checks rules from the 'C++ style rules' section of cppguide.html. + + Most of these rules are hard to test (naming, comment style), but we + do what we can. In particular we check for 2-space indents, line lengths, + tab usage, spaces inside code, etc. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw_lines = clean_lines.lines_without_raw_strings + line = raw_lines[linenum] + prev = raw_lines[linenum - 1] if linenum > 0 else '' + + if line.find('\t') != -1: + error(filename, linenum, 'whitespace/tab', 1, + 'Tab found; better to use spaces') + + # One or three blank spaces at the beginning of the line is weird; it's + # hard to reconcile that with 2-space indents. + # NOTE: here are the conditions rob pike used for his tests. Mine aren't + # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces + # if(RLENGTH > 20) complain = 0; + # if(match($0, " +(error|private|public|protected):")) complain = 0; + # if(match(prev, "&& *$")) complain = 0; + # if(match(prev, "\\|\\| *$")) complain = 0; + # if(match(prev, "[\",=><] *$")) complain = 0; + # if(match($0, " <<")) complain = 0; + # if(match(prev, " +for \\(")) complain = 0; + # if(prevodd && match(prevprev, " +for \\(")) complain = 0; + scope_or_label_pattern = r'\s*\w+\s*:\s*\\?$' + classinfo = nesting_state.InnermostClass() + initial_spaces = 0 + cleansed_line = clean_lines.elided[linenum] + while initial_spaces < len(line) and line[initial_spaces] == ' ': + initial_spaces += 1 + # There are certain situations we allow one space, notably for + # section labels, and also lines containing multi-line raw strings. + # We also don't check for lines that look like continuation lines + # (of lines ending in double quotes, commas, equals, or angle brackets) + # because the rules for how to indent those are non-trivial. + if (not Search(r'[",=><] *$', prev) and + (initial_spaces == 1 or initial_spaces == 3) and + not Match(scope_or_label_pattern, cleansed_line) and + not (clean_lines.raw_lines[linenum] != line and + Match(r'^\s*""', line))): + error(filename, linenum, 'whitespace/indent', 3, + 'Weird number of spaces at line-start. ' + 'Are you using a 2-space indent?') + + if line and line[-1].isspace(): + error(filename, linenum, 'whitespace/end_of_line', 4, + 'Line ends in whitespace. Consider deleting these extra spaces.') + + # Check if the line is a header guard. + is_header_guard = False + if IsHeaderExtension(file_extension): + cppvar = GetHeaderGuardCPPVariable(filename) + if (line.startswith('#ifndef %s' % cppvar) or + line.startswith('#define %s' % cppvar) or + line.startswith('#endif // %s' % cppvar)): + is_header_guard = True + # #include lines and header guards can be long, since there's no clean way to + # split them. + # + # URLs can be long too. It's possible to split these, but it makes them + # harder to cut&paste. + # + # The "$Id:...$" comment may also get very long without it being the + # developers fault. + if (not line.startswith('#include') and not is_header_guard and + not Match(r'^\s*//.*http(s?)://\S*$', line) and + not Match(r'^\s*//\s*[^\s]*$', line) and + not Match(r'^// \$Id:.*#[0-9]+ \$$', line)): + line_width = GetLineWidth(line) + if line_width > _line_length: + error(filename, linenum, 'whitespace/line_length', 2, + 'Lines should be <= %i characters long' % _line_length) + + if (cleansed_line.count(';') > 1 and + # for loops are allowed two ;'s (and may run over two lines). + cleansed_line.find('for') == -1 and + (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or + GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and + # It's ok to have many commands in a switch case that fits in 1 line + not ((cleansed_line.find('case ') != -1 or + cleansed_line.find('default:') != -1) and + cleansed_line.find('break;') != -1)): + error(filename, linenum, 'whitespace/newline', 0, + 'More than one command on the same line') + + # Some more style checks + CheckBraces(filename, clean_lines, linenum, error) + CheckTrailingSemicolon(filename, clean_lines, linenum, error) + CheckEmptyBlockBody(filename, clean_lines, linenum, error) + CheckSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckOperatorSpacing(filename, clean_lines, linenum, error) + CheckParenthesisSpacing(filename, clean_lines, linenum, error) + CheckCommaSpacing(filename, clean_lines, linenum, error) + CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckSpacingForFunctionCall(filename, clean_lines, linenum, error) + CheckCheck(filename, clean_lines, linenum, error) + CheckAltTokens(filename, clean_lines, linenum, error) + classinfo = nesting_state.InnermostClass() + if classinfo: + CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) + + +_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') +# Matches the first component of a filename delimited by -s and _s. That is: +# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo' +_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') + + +def _DropCommonSuffixes(filename): + """Drops common suffixes like _test.cc or -inl.h from filename. + + For example: + >>> _DropCommonSuffixes('foo/foo-inl.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/bar/foo.cc') + 'foo/bar/foo' + >>> _DropCommonSuffixes('foo/foo_internal.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') + 'foo/foo_unusualinternal' + + Args: + filename: The input filename. + + Returns: + The filename with the common suffix removed. + """ + for suffix in ('test.cc', 'regtest.cc', 'unittest.cc', + 'inl.h', 'impl.h', 'internal.h'): + if (filename.endswith(suffix) and len(filename) > len(suffix) and + filename[-len(suffix) - 1] in ('-', '_')): + return filename[:-len(suffix) - 1] + return os.path.splitext(filename)[0] + + +def _ClassifyInclude(fileinfo, include, is_system): + """Figures out what kind of header 'include' is. + + Args: + fileinfo: The current file cpplint is running over. A FileInfo instance. + include: The path to a #included file. + is_system: True if the #include used <> rather than "". + + Returns: + One of the _XXX_HEADER constants. + + For example: + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) + _C_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) + _CPP_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) + _LIKELY_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), + ... 'bar/foo_other_ext.h', False) + _POSSIBLE_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) + _OTHER_HEADER + """ + # This is a list of all standard c++ header files, except + # those already checked for above. + is_cpp_h = include in _CPP_HEADERS + + if is_system: + if is_cpp_h: + return _CPP_SYS_HEADER + else: + return _C_SYS_HEADER + + # If the target file and the include we're checking share a + # basename when we drop common extensions, and the include + # lives in . , then it's likely to be owned by the target file. + target_dir, target_base = ( + os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName()))) + include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) + if target_base == include_base and ( + include_dir == target_dir or + include_dir == os.path.normpath(target_dir + '/../public')): + return _LIKELY_MY_HEADER + + # If the target and include share some initial basename + # component, it's possible the target is implementing the + # include, so it's allowed to be first, but we'll never + # complain if it's not there. + target_first_component = _RE_FIRST_COMPONENT.match(target_base) + include_first_component = _RE_FIRST_COMPONENT.match(include_base) + if (target_first_component and include_first_component and + target_first_component.group(0) == + include_first_component.group(0)): + return _POSSIBLE_MY_HEADER + + return _OTHER_HEADER + + + +def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): + """Check rules that are applicable to #include lines. + + Strings on #include lines are NOT removed from elided line, to make + certain tasks easier. However, to prevent false positives, checks + applicable to #include lines in CheckLanguage must be put here. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + include_state: An _IncludeState instance in which the headers are inserted. + error: The function to call with any errors found. + """ + fileinfo = FileInfo(filename) + line = clean_lines.lines[linenum] + + # "include" should use the new style "foo/bar.h" instead of just "bar.h" + # Only do this check if the included header follows google naming + # conventions. If not, assume that it's a 3rd party API that + # requires special include conventions. + # + # We also make an exception for Lua headers, which follow google + # naming convention but not the include convention. + match = Match(r'#include\s*"([^/]+\.h)"', line) + if match and not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1)): + error(filename, linenum, 'build/include', 4, + 'Include the directory when naming .h files') + + # we shouldn't include a file more than once. actually, there are a + # handful of instances where doing so is okay, but in general it's + # not. + match = _RE_PATTERN_INCLUDE.search(line) + if match: + include = match.group(2) + is_system = (match.group(1) == '<') + duplicate_line = include_state.FindHeader(include) + if duplicate_line >= 0: + error(filename, linenum, 'build/include', 4, + '"%s" already included at %s:%s' % + (include, filename, duplicate_line)) + elif (include.endswith('.cc') and + os.path.dirname(fileinfo.RepositoryName()) != os.path.dirname(include)): + error(filename, linenum, 'build/include', 4, + 'Do not include .cc files from other packages') + elif not _THIRD_PARTY_HEADERS_PATTERN.match(include): + include_state.include_list[-1].append((include, linenum)) + + # We want to ensure that headers appear in the right order: + # 1) for foo.cc, foo.h (preferred location) + # 2) c system files + # 3) cpp system files + # 4) for foo.cc, foo.h (deprecated location) + # 5) other google headers + # + # We classify each include statement as one of those 5 types + # using a number of techniques. The include_state object keeps + # track of the highest type seen, and complains if we see a + # lower type after that. + error_message = include_state.CheckNextIncludeOrder( + _ClassifyInclude(fileinfo, include, is_system)) + if error_message: + error(filename, linenum, 'build/include_order', 4, + '%s. Should be: %s.h, c system, c++ system, other.' % + (error_message, fileinfo.BaseName())) + canonical_include = include_state.CanonicalizeAlphabeticalOrder(include) + if not include_state.IsInAlphabeticalOrder( + clean_lines, linenum, canonical_include): + error(filename, linenum, 'build/include_alpha', 4, + 'Include "%s" not in alphabetical order' % include) + include_state.SetLastHeader(canonical_include) + + + +def _GetTextInside(text, start_pattern): + r"""Retrieves all the text between matching open and close parentheses. + + Given a string of lines and a regular expression string, retrieve all the text + following the expression and between opening punctuation symbols like + (, [, or {, and the matching close-punctuation symbol. This properly nested + occurrences of the punctuations, so for the text like + printf(a(), b(c())); + a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'. + start_pattern must match string having an open punctuation symbol at the end. + + Args: + text: The lines to extract text. Its comments and strings must be elided. + It can be single line and can span multiple lines. + start_pattern: The regexp string indicating where to start extracting + the text. + Returns: + The extracted text. + None if either the opening string or ending punctuation could not be found. + """ + # TODO(unknown): Audit cpplint.py to see what places could be profitably + # rewritten to use _GetTextInside (and use inferior regexp matching today). + + # Give opening punctuations to get the matching close-punctuations. + matching_punctuation = {'(': ')', '{': '}', '[': ']'} + closing_punctuation = set(matching_punctuation.itervalues()) + + # Find the position to start extracting text. + match = re.search(start_pattern, text, re.M) + if not match: # start_pattern not found in text. + return None + start_position = match.end(0) + + assert start_position > 0, ( + 'start_pattern must ends with an opening punctuation.') + assert text[start_position - 1] in matching_punctuation, ( + 'start_pattern must ends with an opening punctuation.') + # Stack of closing punctuations we expect to have in text after position. + punctuation_stack = [matching_punctuation[text[start_position - 1]]] + position = start_position + while punctuation_stack and position < len(text): + if text[position] == punctuation_stack[-1]: + punctuation_stack.pop() + elif text[position] in closing_punctuation: + # A closing punctuation without matching opening punctuations. + return None + elif text[position] in matching_punctuation: + punctuation_stack.append(matching_punctuation[text[position]]) + position += 1 + if punctuation_stack: + # Opening punctuations left without matching close-punctuations. + return None + # punctuations match. + return text[start_position:position - 1] + + +# Patterns for matching call-by-reference parameters. +# +# Supports nested templates up to 2 levels deep using this messy pattern: +# < (?: < (?: < [^<>]* +# > +# | [^<>] )* +# > +# | [^<>] )* +# > +_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]* +_RE_PATTERN_TYPE = ( + r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?' + r'(?:\w|' + r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|' + r'::)+') +# A call-by-reference parameter ends with '& identifier'. +_RE_PATTERN_REF_PARAM = re.compile( + r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*' + r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]') +# A call-by-const-reference parameter either ends with 'const& identifier' +# or looks like 'const type& identifier' when 'type' is atomic. +_RE_PATTERN_CONST_REF_PARAM = ( + r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT + + r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')') +# Stream types. +_RE_PATTERN_REF_STREAM_PARAM = ( + r'(?:.*stream\s*&\s*' + _RE_PATTERN_IDENT + r')') + + +def CheckLanguage(filename, clean_lines, linenum, file_extension, + include_state, nesting_state, error): + """Checks rules from the 'C++ language rules' section of cppguide.html. + + Some of these rules are hard to test (function overloading, using + uint32 inappropriately), but we do the best we can. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + include_state: An _IncludeState instance in which the headers are inserted. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # If the line is empty or consists of entirely a comment, no need to + # check it. + line = clean_lines.elided[linenum] + if not line: + return + + match = _RE_PATTERN_INCLUDE.search(line) + if match: + CheckIncludeLine(filename, clean_lines, linenum, include_state, error) + return + + # Reset include state across preprocessor directives. This is meant + # to silence warnings for conditional includes. + match = Match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line) + if match: + include_state.ResetSection(match.group(1)) + + # Make Windows paths like Unix. + fullname = os.path.abspath(filename).replace('\\', '/') + + # Perform other checks now that we are sure that this is not an include line + CheckCasts(filename, clean_lines, linenum, error) + CheckGlobalStatic(filename, clean_lines, linenum, error) + CheckPrintf(filename, clean_lines, linenum, error) + + if IsHeaderExtension(file_extension): + # TODO(unknown): check that 1-arg constructors are explicit. + # How to tell it's a constructor? + # (handled in CheckForNonStandardConstructs for now) + # TODO(unknown): check that classes declare or disable copy/assign + # (level 1 error) + pass + + # Check if people are using the verboten C basic types. The only exception + # we regularly allow is "unsigned short port" for port. + if Search(r'\bshort port\b', line): + if not Search(r'\bunsigned short port\b', line): + error(filename, linenum, 'runtime/int', 4, + 'Use "unsigned short" for ports, not "short"') + else: + match = Search(r'\b(short|long(?! +double)|long long)\b', line) + if match: + error(filename, linenum, 'runtime/int', 4, + 'Use int16/int64/etc, rather than the C type %s' % match.group(1)) + + # Check if some verboten operator overloading is going on + # TODO(unknown): catch out-of-line unary operator&: + # class X {}; + # int operator&(const X& x) { return 42; } // unary operator& + # The trick is it's hard to tell apart from binary operator&: + # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& + if Search(r'\boperator\s*&\s*\(\s*\)', line): + error(filename, linenum, 'runtime/operator', 4, + 'Unary operator& is dangerous. Do not use it.') + + # Check for suspicious usage of "if" like + # } if (a == b) { + if Search(r'\}\s*if\s*\(', line): + error(filename, linenum, 'readability/braces', 4, + 'Did you mean "else if"? If not, start a new line for "if".') + + # Check for potential format string bugs like printf(foo). + # We constrain the pattern not to pick things like DocidForPrintf(foo). + # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) + # TODO(unknown): Catch the following case. Need to change the calling + # convention of the whole function to process multiple line to handle it. + # printf( + # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); + printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(') + if printf_args: + match = Match(r'([\w.\->()]+)$', printf_args) + if match and match.group(1) != '__VA_ARGS__': + function_name = re.search(r'\b((?:string)?printf)\s*\(', + line, re.I).group(1) + error(filename, linenum, 'runtime/printf', 4, + 'Potential format string bug. Do %s("%%s", %s) instead.' + % (function_name, match.group(1))) + + # Check for potential memset bugs like memset(buf, sizeof(buf), 0). + match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) + if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): + error(filename, linenum, 'runtime/memset', 4, + 'Did you mean "memset(%s, 0, %s)"?' + % (match.group(1), match.group(2))) + + if Search(r'\busing namespace\b', line): + error(filename, linenum, 'build/namespaces', 5, + 'Do not use namespace using-directives. ' + 'Use using-declarations instead.') + + # Detect variable-length arrays. + match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) + if (match and match.group(2) != 'return' and match.group(2) != 'delete' and + match.group(3).find(']') == -1): + # Split the size using space and arithmetic operators as delimiters. + # If any of the resulting tokens are not compile time constants then + # report the error. + tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3)) + is_const = True + skip_next = False + for tok in tokens: + if skip_next: + skip_next = False + continue + + if Search(r'sizeof\(.+\)', tok): continue + if Search(r'arraysize\(\w+\)', tok): continue + + tok = tok.lstrip('(') + tok = tok.rstrip(')') + if not tok: continue + if Match(r'\d+', tok): continue + if Match(r'0[xX][0-9a-fA-F]+', tok): continue + if Match(r'k[A-Z0-9]\w*', tok): continue + if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue + if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue + # A catch all for tricky sizeof cases, including 'sizeof expression', + # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' + # requires skipping the next token because we split on ' ' and '*'. + if tok.startswith('sizeof'): + skip_next = True + continue + is_const = False + break + if not is_const: + error(filename, linenum, 'runtime/arrays', 1, + 'Do not use variable-length arrays. Use an appropriately named ' + "('k' followed by CamelCase) compile-time constant for the size.") + + # Check for use of unnamed namespaces in header files. Registration + # macros are typically OK, so we allow use of "namespace {" on lines + # that end with backslashes. + if (IsHeaderExtension(file_extension) + and Search(r'\bnamespace\s*{', line) + and line[-1] != '\\'): + error(filename, linenum, 'build/namespaces', 4, + 'Do not use unnamed namespaces in header files. See ' + 'https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' + ' for more information.') + + +def CheckGlobalStatic(filename, clean_lines, linenum, error): + """Check for unsafe global or static objects. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Match two lines at a time to support multiline declarations + if linenum + 1 < clean_lines.NumLines() and not Search(r'[;({]', line): + line += clean_lines.elided[linenum + 1].strip() + + # Check for people declaring static/global STL strings at the top level. + # This is dangerous because the C++ language does not guarantee that + # globals with constructors are initialized before the first access, and + # also because globals can be destroyed when some threads are still running. + # TODO(unknown): Generalize this to also find static unique_ptr instances. + # TODO(unknown): File bugs for clang-tidy to find these. + match = Match( + r'((?:|static +)(?:|const +))(?::*std::)?string( +const)? +' + r'([a-zA-Z0-9_:]+)\b(.*)', + line) + + # Remove false positives: + # - String pointers (as opposed to values). + # string *pointer + # const string *pointer + # string const *pointer + # string *const pointer + # + # - Functions and template specializations. + # string Function(... + # string Class::Method(... + # + # - Operators. These are matched separately because operator names + # cross non-word boundaries, and trying to match both operators + # and functions at the same time would decrease accuracy of + # matching identifiers. + # string Class::operator*() + if (match and + not Search(r'\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w', line) and + not Search(r'\boperator\W', line) and + not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4))): + if Search(r'\bconst\b', line): + error(filename, linenum, 'runtime/string', 4, + 'For a static/global string constant, use a C style string ' + 'instead: "%schar%s %s[]".' % + (match.group(1), match.group(2) or '', match.group(3))) + else: + error(filename, linenum, 'runtime/string', 4, + 'Static/global string variables are not permitted.') + + if (Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or + Search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)): + error(filename, linenum, 'runtime/init', 4, + 'You seem to be initializing a member variable with itself.') + + +def CheckPrintf(filename, clean_lines, linenum, error): + """Check for printf related issues. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # When snprintf is used, the second argument shouldn't be a literal. + match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) + if match and match.group(2) != '0': + # If 2nd arg is zero, snprintf is used to calculate size. + error(filename, linenum, 'runtime/printf', 3, + 'If you can, use sizeof(%s) instead of %s as the 2nd arg ' + 'to snprintf.' % (match.group(1), match.group(2))) + + # Check if some verboten C functions are being used. + if Search(r'\bsprintf\s*\(', line): + error(filename, linenum, 'runtime/printf', 5, + 'Never use sprintf. Use snprintf instead.') + match = Search(r'\b(strcpy|strcat)\s*\(', line) + if match: + error(filename, linenum, 'runtime/printf', 4, + 'Almost always, snprintf is better than %s' % match.group(1)) + + +def IsDerivedFunction(clean_lines, linenum): + """Check if current line contains an inherited function. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains a function with "override" + virt-specifier. + """ + # Scan back a few lines for start of current function + for i in xrange(linenum, max(-1, linenum - 10), -1): + match = Match(r'^([^()]*\w+)\(', clean_lines.elided[i]) + if match: + # Look for "override" after the matching closing parenthesis + line, _, closing_paren = CloseExpression( + clean_lines, i, len(match.group(1))) + return (closing_paren >= 0 and + Search(r'\boverride\b', line[closing_paren:])) + return False + + +def IsOutOfLineMethodDefinition(clean_lines, linenum): + """Check if current line contains an out-of-line method definition. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains an out-of-line method definition. + """ + # Scan back a few lines for start of current function + for i in xrange(linenum, max(-1, linenum - 10), -1): + if Match(r'^([^()]*\w+)\(', clean_lines.elided[i]): + return Match(r'^[^()]*\w+::\w+\(', clean_lines.elided[i]) is not None + return False + + +def IsInitializerList(clean_lines, linenum): + """Check if current line is inside constructor initializer list. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line appears to be inside constructor initializer + list, False otherwise. + """ + for i in xrange(linenum, 1, -1): + line = clean_lines.elided[i] + if i == linenum: + remove_function_body = Match(r'^(.*)\{\s*$', line) + if remove_function_body: + line = remove_function_body.group(1) + + if Search(r'\s:\s*\w+[({]', line): + # A lone colon tend to indicate the start of a constructor + # initializer list. It could also be a ternary operator, which + # also tend to appear in constructor initializer lists as + # opposed to parameter lists. + return True + if Search(r'\}\s*,\s*$', line): + # A closing brace followed by a comma is probably the end of a + # brace-initialized member in constructor initializer list. + return True + if Search(r'[{};]\s*$', line): + # Found one of the following: + # - A closing brace or semicolon, probably the end of the previous + # function. + # - An opening brace, probably the start of current class or namespace. + # + # Current line is probably not inside an initializer list since + # we saw one of those things without seeing the starting colon. + return False + + # Got to the beginning of the file without seeing the start of + # constructor initializer list. + return False + + +def CheckForNonConstReference(filename, clean_lines, linenum, + nesting_state, error): + """Check for non-const references. + + Separate from CheckLanguage since it scans backwards from current + line, instead of scanning forward. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # Do nothing if there is no '&' on current line. + line = clean_lines.elided[linenum] + if '&' not in line: + return + + # If a function is inherited, current function doesn't have much of + # a choice, so any non-const references should not be blamed on + # derived function. + if IsDerivedFunction(clean_lines, linenum): + return + + # Don't warn on out-of-line method definitions, as we would warn on the + # in-line declaration, if it isn't marked with 'override'. + if IsOutOfLineMethodDefinition(clean_lines, linenum): + return + + # Long type names may be broken across multiple lines, usually in one + # of these forms: + # LongType + # ::LongTypeContinued &identifier + # LongType:: + # LongTypeContinued &identifier + # LongType< + # ...>::LongTypeContinued &identifier + # + # If we detected a type split across two lines, join the previous + # line to current line so that we can match const references + # accordingly. + # + # Note that this only scans back one line, since scanning back + # arbitrary number of lines would be expensive. If you have a type + # that spans more than 2 lines, please use a typedef. + if linenum > 1: + previous = None + if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line): + # previous_line\n + ::current_line + previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$', + clean_lines.elided[linenum - 1]) + elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line): + # previous_line::\n + current_line + previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$', + clean_lines.elided[linenum - 1]) + if previous: + line = previous.group(1) + line.lstrip() + else: + # Check for templated parameter that is split across multiple lines + endpos = line.rfind('>') + if endpos > -1: + (_, startline, startpos) = ReverseCloseExpression( + clean_lines, linenum, endpos) + if startpos > -1 and startline < linenum: + # Found the matching < on an earlier line, collect all + # pieces up to current line. + line = '' + for i in xrange(startline, linenum + 1): + line += clean_lines.elided[i].strip() + + # Check for non-const references in function parameters. A single '&' may + # found in the following places: + # inside expression: binary & for bitwise AND + # inside expression: unary & for taking the address of something + # inside declarators: reference parameter + # We will exclude the first two cases by checking that we are not inside a + # function body, including one that was just introduced by a trailing '{'. + # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. + if (nesting_state.previous_stack_top and + not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or + isinstance(nesting_state.previous_stack_top, _NamespaceInfo))): + # Not at toplevel, not within a class, and not within a namespace + return + + # Avoid initializer lists. We only need to scan back from the + # current line for something that starts with ':'. + # + # We don't need to check the current line, since the '&' would + # appear inside the second set of parentheses on the current line as + # opposed to the first set. + if linenum > 0: + for i in xrange(linenum - 1, max(0, linenum - 10), -1): + previous_line = clean_lines.elided[i] + if not Search(r'[),]\s*$', previous_line): + break + if Match(r'^\s*:\s+\S', previous_line): + return + + # Avoid preprocessors + if Search(r'\\\s*$', line): + return + + # Avoid constructor initializer lists + if IsInitializerList(clean_lines, linenum): + return + + # We allow non-const references in a few standard places, like functions + # called "swap()" or iostream operators like "<<" or ">>". Do not check + # those function parameters. + # + # We also accept & in static_assert, which looks like a function but + # it's actually a declaration expression. + whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|' + r'operator\s*[<>][<>]|' + r'static_assert|COMPILE_ASSERT' + r')\s*\(') + if Search(whitelisted_functions, line): + return + elif not Search(r'\S+\([^)]*$', line): + # Don't see a whitelisted function on this line. Actually we + # didn't see any function name on this line, so this is likely a + # multi-line parameter list. Try a bit harder to catch this case. + for i in xrange(2): + if (linenum > i and + Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])): + return + + decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body + for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): + if (not Match(_RE_PATTERN_CONST_REF_PARAM, parameter) and + not Match(_RE_PATTERN_REF_STREAM_PARAM, parameter)): + error(filename, linenum, 'runtime/references', 2, + 'Is this a non-const reference? ' + 'If so, make const or use a pointer: ' + + ReplaceAll(' *<', '<', parameter)) + + +def CheckCasts(filename, clean_lines, linenum, error): + """Various cast related checks. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Check to see if they're using an conversion function cast. + # I just try to capture the most common basic types, though there are more. + # Parameterless conversion functions, such as bool(), are allowed as they are + # probably a member operator declaration or default constructor. + match = Search( + r'(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b' + r'(int|float|double|bool|char|int32|uint32|int64|uint64)' + r'(\([^)].*)', line) + expecting_function = ExpectingFunctionArgs(clean_lines, linenum) + if match and not expecting_function: + matched_type = match.group(2) + + # matched_new_or_template is used to silence two false positives: + # - New operators + # - Template arguments with function types + # + # For template arguments, we match on types immediately following + # an opening bracket without any spaces. This is a fast way to + # silence the common case where the function type is the first + # template argument. False negative with less-than comparison is + # avoided because those operators are usually followed by a space. + # + # function // bracket + no space = false positive + # value < double(42) // bracket + space = true positive + matched_new_or_template = match.group(1) + + # Avoid arrays by looking for brackets that come after the closing + # parenthesis. + if Match(r'\([^()]+\)\s*\[', match.group(3)): + return + + # Other things to ignore: + # - Function pointers + # - Casts to pointer types + # - Placement new + # - Alias declarations + matched_funcptr = match.group(3) + if (matched_new_or_template is None and + not (matched_funcptr and + (Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(', + matched_funcptr) or + matched_funcptr.startswith('(*)'))) and + not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and + not Search(r'new\(\S+\)\s*' + matched_type, line)): + error(filename, linenum, 'readability/casting', 4, + 'Using deprecated casting style. ' + 'Use static_cast<%s>(...) instead' % + matched_type) + + if not expecting_function: + CheckCStyleCast(filename, clean_lines, linenum, 'static_cast', + r'\((int|float|double|bool|char|u?int(16|32|64))\)', error) + + # This doesn't catch all cases. Consider (const char * const)"hello". + # + # (char *) "foo" should always be a const_cast (reinterpret_cast won't + # compile). + if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast', + r'\((char\s?\*+\s?)\)\s*"', error): + pass + else: + # Check pointer casts for other than string constants + CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast', + r'\((\w+\s?\*+\s?)\)', error) + + # In addition, we look for people taking the address of a cast. This + # is dangerous -- casts can assign to temporaries, so the pointer doesn't + # point where you think. + # + # Some non-identifier character is required before the '&' for the + # expression to be recognized as a cast. These are casts: + # expression = &static_cast(temporary()); + # function(&(int*)(temporary())); + # + # This is not a cast: + # reference_type&(int* function_param); + match = Search( + r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|' + r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line) + if match: + # Try a better error message when the & is bound to something + # dereferenced by the casted pointer, as opposed to the casted + # pointer itself. + parenthesis_error = False + match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line) + if match: + _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1))) + if x1 >= 0 and clean_lines.elided[y1][x1] == '(': + _, y2, x2 = CloseExpression(clean_lines, y1, x1) + if x2 >= 0: + extended_line = clean_lines.elided[y2][x2:] + if y2 < clean_lines.NumLines() - 1: + extended_line += clean_lines.elided[y2 + 1] + if Match(r'\s*(?:->|\[)', extended_line): + parenthesis_error = True + + if parenthesis_error: + error(filename, linenum, 'readability/casting', 4, + ('Are you taking an address of something dereferenced ' + 'from a cast? Wrapping the dereferenced expression in ' + 'parentheses will make the binding more obvious')) + else: + error(filename, linenum, 'runtime/casting', 4, + ('Are you taking an address of a cast? ' + 'This is dangerous: could be a temp var. ' + 'Take the address before doing the cast, rather than after')) + + +def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error): + """Checks for a C-style cast by looking for the pattern. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + cast_type: The string for the C++ cast to recommend. This is either + reinterpret_cast, static_cast, or const_cast, depending. + pattern: The regular expression used to find C-style casts. + error: The function to call with any errors found. + + Returns: + True if an error was emitted. + False otherwise. + """ + line = clean_lines.elided[linenum] + match = Search(pattern, line) + if not match: + return False + + # Exclude lines with keywords that tend to look like casts + context = line[0:match.start(1) - 1] + if Match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context): + return False + + # Try expanding current context to see if we one level of + # parentheses inside a macro. + if linenum > 0: + for i in xrange(linenum - 1, max(0, linenum - 5), -1): + context = clean_lines.elided[i] + context + if Match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context): + return False + + # operator++(int) and operator--(int) + if context.endswith(' operator++') or context.endswith(' operator--'): + return False + + # A single unnamed argument for a function tends to look like old style cast. + # If we see those, don't issue warnings for deprecated casts. + remainder = line[match.end(0):] + if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)', + remainder): + return False + + # At this point, all that should be left is actual casts. + error(filename, linenum, 'readability/casting', 4, + 'Using C-style cast. Use %s<%s>(...) instead' % + (cast_type, match.group(1))) + + return True + + +def ExpectingFunctionArgs(clean_lines, linenum): + """Checks whether where function type arguments are expected. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + + Returns: + True if the line at 'linenum' is inside something that expects arguments + of function types. + """ + line = clean_lines.elided[linenum] + return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or + (linenum >= 2 and + (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$', + clean_lines.elided[linenum - 1]) or + Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$', + clean_lines.elided[linenum - 2]) or + Search(r'\bstd::m?function\s*\<\s*$', + clean_lines.elided[linenum - 1])))) + + +_HEADERS_CONTAINING_TEMPLATES = ( + ('', ('deque',)), + ('', ('unary_function', 'binary_function', + 'plus', 'minus', 'multiplies', 'divides', 'modulus', + 'negate', + 'equal_to', 'not_equal_to', 'greater', 'less', + 'greater_equal', 'less_equal', + 'logical_and', 'logical_or', 'logical_not', + 'unary_negate', 'not1', 'binary_negate', 'not2', + 'bind1st', 'bind2nd', + 'pointer_to_unary_function', + 'pointer_to_binary_function', + 'ptr_fun', + 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t', + 'mem_fun_ref_t', + 'const_mem_fun_t', 'const_mem_fun1_t', + 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t', + 'mem_fun_ref', + )), + ('', ('numeric_limits',)), + ('', ('list',)), + ('', ('map', 'multimap',)), + ('', ('allocator', 'make_shared', 'make_unique', 'shared_ptr', + 'unique_ptr', 'weak_ptr')), + ('', ('queue', 'priority_queue',)), + ('', ('set', 'multiset',)), + ('', ('stack',)), + ('', ('char_traits', 'basic_string',)), + ('', ('tuple',)), + ('', ('unordered_map', 'unordered_multimap')), + ('', ('unordered_set', 'unordered_multiset')), + ('', ('pair',)), + ('', ('vector',)), + + # gcc extensions. + # Note: std::hash is their hash, ::hash is our hash + ('', ('hash_map', 'hash_multimap',)), + ('', ('hash_set', 'hash_multiset',)), + ('', ('slist',)), + ) + +_HEADERS_MAYBE_TEMPLATES = ( + ('', ('copy', 'max', 'min', 'min_element', 'sort', + 'transform', + )), + ('', ('forward', 'make_pair', 'move', 'swap')), + ) + +_RE_PATTERN_STRING = re.compile(r'\bstring\b') + +_re_pattern_headers_maybe_templates = [] +for _header, _templates in _HEADERS_MAYBE_TEMPLATES: + for _template in _templates: + # Match max(..., ...), max(..., ...), but not foo->max, foo.max or + # type::max(). + _re_pattern_headers_maybe_templates.append( + (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), + _template, + _header)) + +# Other scripts may reach in and modify this pattern. +_re_pattern_templates = [] +for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: + for _template in _templates: + _re_pattern_templates.append( + (re.compile(r'(\<|\b)' + _template + r'\s*\<'), + _template + '<>', + _header)) + + +def FilesBelongToSameModule(filename_cc, filename_h): + """Check if these two filenames belong to the same module. + + The concept of a 'module' here is a as follows: + foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the + same 'module' if they are in the same directory. + some/path/public/xyzzy and some/path/internal/xyzzy are also considered + to belong to the same module here. + + If the filename_cc contains a longer path than the filename_h, for example, + '/absolute/path/to/base/sysinfo.cc', and this file would include + 'base/sysinfo.h', this function also produces the prefix needed to open the + header. This is used by the caller of this function to more robustly open the + header file. We don't have access to the real include paths in this context, + so we need this guesswork here. + + Known bugs: tools/base/bar.cc and base/bar.h belong to the same module + according to this implementation. Because of this, this function gives + some false positives. This should be sufficiently rare in practice. + + Args: + filename_cc: is the path for the .cc file + filename_h: is the path for the header path + + Returns: + Tuple with a bool and a string: + bool: True if filename_cc and filename_h belong to the same module. + string: the additional prefix needed to open the header file. + """ + + fileinfo = FileInfo(filename_cc) + if not fileinfo.IsSource(): + return (False, '') + filename_cc = filename_cc[:-len(fileinfo.Extension())] + matched_test_suffix = Search(_TEST_FILE_SUFFIX, fileinfo.BaseName()) + if matched_test_suffix: + filename_cc = filename_cc[:-len(matched_test_suffix.group(1))] + filename_cc = filename_cc.replace('/public/', '/') + filename_cc = filename_cc.replace('/internal/', '/') + + if not filename_h.endswith('.h'): + return (False, '') + filename_h = filename_h[:-len('.h')] + if filename_h.endswith('-inl'): + filename_h = filename_h[:-len('-inl')] + filename_h = filename_h.replace('/public/', '/') + filename_h = filename_h.replace('/internal/', '/') + + files_belong_to_same_module = filename_cc.endswith(filename_h) + common_path = '' + if files_belong_to_same_module: + common_path = filename_cc[:-len(filename_h)] + return files_belong_to_same_module, common_path + + +def UpdateIncludeState(filename, include_dict, io=codecs): + """Fill up the include_dict with new includes found from the file. + + Args: + filename: the name of the header to read. + include_dict: a dictionary in which the headers are inserted. + io: The io factory to use to read the file. Provided for testability. + + Returns: + True if a header was successfully added. False otherwise. + """ + headerfile = None + try: + headerfile = io.open(filename, 'r', 'utf8', 'replace') + except IOError: + return False + linenum = 0 + for line in headerfile: + linenum += 1 + clean_line = CleanseComments(line) + match = _RE_PATTERN_INCLUDE.search(clean_line) + if match: + include = match.group(2) + include_dict.setdefault(include, linenum) + return True + + +def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, + io=codecs): + """Reports for missing stl includes. + + This function will output warnings to make sure you are including the headers + necessary for the stl containers and functions that you use. We only give one + reason to include a header. For example, if you use both equal_to<> and + less<> in a .h file, only one (the latter in the file) of these will be + reported as a reason to include the . + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + include_state: An _IncludeState instance. + error: The function to call with any errors found. + io: The IO factory to use to read the header file. Provided for unittest + injection. + """ + required = {} # A map of header name to linenumber and the template entity. + # Example of required: { '': (1219, 'less<>') } + + for linenum in xrange(clean_lines.NumLines()): + line = clean_lines.elided[linenum] + if not line or line[0] == '#': + continue + + # String is special -- it is a non-templatized type in STL. + matched = _RE_PATTERN_STRING.search(line) + if matched: + # Don't warn about strings in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[:matched.start()] + if prefix.endswith('std::') or not prefix.endswith('::'): + required[''] = (linenum, 'string') + + for pattern, template, header in _re_pattern_headers_maybe_templates: + if pattern.search(line): + required[header] = (linenum, template) + + # The following function is just a speed up, no semantics are changed. + if not '<' in line: # Reduces the cpu time usage by skipping lines. + continue + + for pattern, template, header in _re_pattern_templates: + matched = pattern.search(line) + if matched: + # Don't warn about IWYU in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[:matched.start()] + if prefix.endswith('std::') or not prefix.endswith('::'): + required[header] = (linenum, template) + + # The policy is that if you #include something in foo.h you don't need to + # include it again in foo.cc. Here, we will look at possible includes. + # Let's flatten the include_state include_list and copy it into a dictionary. + include_dict = dict([item for sublist in include_state.include_list + for item in sublist]) + + # Did we find the header for this file (if any) and successfully load it? + header_found = False + + # Use the absolute path so that matching works properly. + abs_filename = FileInfo(filename).FullName() + + # For Emacs's flymake. + # If cpplint is invoked from Emacs's flymake, a temporary file is generated + # by flymake and that file name might end with '_flymake.cc'. In that case, + # restore original file name here so that the corresponding header file can be + # found. + # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h' + # instead of 'foo_flymake.h' + abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename) + + # include_dict is modified during iteration, so we iterate over a copy of + # the keys. + header_keys = include_dict.keys() + for header in header_keys: + (same_module, common_path) = FilesBelongToSameModule(abs_filename, header) + fullpath = common_path + header + if same_module and UpdateIncludeState(fullpath, include_dict, io): + header_found = True + + # If we can't find the header file for a .cc, assume it's because we don't + # know where to look. In that case we'll give up as we're not sure they + # didn't include it in the .h file. + # TODO(unknown): Do a better job of finding .h files so we are confident that + # not having the .h file means there isn't one. + if filename.endswith('.cc') and not header_found: + return + + # All the lines have been processed, report the errors found. + for required_header_unstripped in required: + template = required[required_header_unstripped][1] + if required_header_unstripped.strip('<>"') not in include_dict: + error(filename, required[required_header_unstripped][0], + 'build/include_what_you_use', 4, + 'Add #include ' + required_header_unstripped + ' for ' + template) + + +_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<') + + +def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): + """Check that make_pair's template arguments are deduced. + + G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are + specified explicitly, and such use isn't intended in any case. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line) + if match: + error(filename, linenum, 'build/explicit_make_pair', + 4, # 4 = high confidence + 'For C++11-compatibility, omit template arguments from make_pair' + ' OR use pair directly OR if appropriate, construct a pair directly') + + +def CheckRedundantVirtual(filename, clean_lines, linenum, error): + """Check if line contains a redundant "virtual" function-specifier. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for "virtual" on current line. + line = clean_lines.elided[linenum] + virtual = Match(r'^(.*)(\bvirtual\b)(.*)$', line) + if not virtual: return + + # Ignore "virtual" keywords that are near access-specifiers. These + # are only used in class base-specifier and do not apply to member + # functions. + if (Search(r'\b(public|protected|private)\s+$', virtual.group(1)) or + Match(r'^\s+(public|protected|private)\b', virtual.group(3))): + return + + # Ignore the "virtual" keyword from virtual base classes. Usually + # there is a column on the same line in these cases (virtual base + # classes are rare in google3 because multiple inheritance is rare). + if Match(r'^.*[^:]:[^:].*$', line): return + + # Look for the next opening parenthesis. This is the start of the + # parameter list (possibly on the next line shortly after virtual). + # TODO(unknown): doesn't work if there are virtual functions with + # decltype() or other things that use parentheses, but csearch suggests + # that this is rare. + end_col = -1 + end_line = -1 + start_col = len(virtual.group(2)) + for start_line in xrange(linenum, min(linenum + 3, clean_lines.NumLines())): + line = clean_lines.elided[start_line][start_col:] + parameter_list = Match(r'^([^(]*)\(', line) + if parameter_list: + # Match parentheses to find the end of the parameter list + (_, end_line, end_col) = CloseExpression( + clean_lines, start_line, start_col + len(parameter_list.group(1))) + break + start_col = 0 + + if end_col < 0: + return # Couldn't find end of parameter list, give up + + # Look for "override" or "final" after the parameter list + # (possibly on the next few lines). + for i in xrange(end_line, min(end_line + 3, clean_lines.NumLines())): + line = clean_lines.elided[i][end_col:] + match = Search(r'\b(override|final)\b', line) + if match: + error(filename, linenum, 'readability/inheritance', 4, + ('"virtual" is redundant since function is ' + 'already declared as "%s"' % match.group(1))) + + # Set end_col to check whole lines after we are done with the + # first line. + end_col = 0 + if Search(r'[^\w]\s*$', line): + break + + +def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error): + """Check if line contains a redundant "override" or "final" virt-specifier. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for closing parenthesis nearby. We need one to confirm where + # the declarator ends and where the virt-specifier starts to avoid + # false positives. + line = clean_lines.elided[linenum] + declarator_end = line.rfind(')') + if declarator_end >= 0: + fragment = line[declarator_end:] + else: + if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0: + fragment = line + else: + return + + # Check that at most one of "override" or "final" is present, not both + if Search(r'\boverride\b', fragment) and Search(r'\bfinal\b', fragment): + error(filename, linenum, 'readability/inheritance', 4, + ('"override" is redundant since function is ' + 'already declared as "final"')) + + + + +# Returns true if we are at a new block, and it is directly +# inside of a namespace. +def IsBlockInNameSpace(nesting_state, is_forward_declaration): + """Checks that the new block is directly in a namespace. + + Args: + nesting_state: The _NestingState object that contains info about our state. + is_forward_declaration: If the class is a forward declared class. + Returns: + Whether or not the new block is directly in a namespace. + """ + if is_forward_declaration: + if len(nesting_state.stack) >= 1 and ( + isinstance(nesting_state.stack[-1], _NamespaceInfo)): + return True + else: + return False + + return (len(nesting_state.stack) > 1 and + nesting_state.stack[-1].check_namespace_indentation and + isinstance(nesting_state.stack[-2], _NamespaceInfo)) + + +def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + raw_lines_no_comments, linenum): + """This method determines if we should apply our namespace indentation check. + + Args: + nesting_state: The current nesting state. + is_namespace_indent_item: If we just put a new class on the stack, True. + If the top of the stack is not a class, or we did not recently + add the class, False. + raw_lines_no_comments: The lines without the comments. + linenum: The current line number we are processing. + + Returns: + True if we should apply our namespace indentation check. Currently, it + only works for classes and namespaces inside of a namespace. + """ + + is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments, + linenum) + + if not (is_namespace_indent_item or is_forward_declaration): + return False + + # If we are in a macro, we do not want to check the namespace indentation. + if IsMacroDefinition(raw_lines_no_comments, linenum): + return False + + return IsBlockInNameSpace(nesting_state, is_forward_declaration) + + +# Call this method if the line is directly inside of a namespace. +# If the line above is blank (excluding comments) or the start of +# an inner namespace, it cannot be indented. +def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum, + error): + line = raw_lines_no_comments[linenum] + if Match(r'^\s+', line): + error(filename, linenum, 'runtime/indentation_namespace', 4, + 'Do not indent within a namespace') + + +def ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions=[]): + """Processes a single line in the file. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + clean_lines: An array of strings, each representing a line of the file, + with comments stripped. + line: Number of line being processed. + include_state: An _IncludeState instance in which the headers are inserted. + function_state: A _FunctionState instance which counts function lines, etc. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions(filename, raw_lines[line], line, error) + nesting_state.Update(filename, clean_lines, line, error) + CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error) + if nesting_state.InAsmBlock(): return + CheckForFunctionLengths(filename, clean_lines, line, function_state, error) + CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) + CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) + CheckLanguage(filename, clean_lines, line, file_extension, include_state, + nesting_state, error) + CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) + CheckForNonStandardConstructs(filename, clean_lines, line, + nesting_state, error) + CheckVlogArguments(filename, clean_lines, line, error) + CheckPosixThreading(filename, clean_lines, line, error) + CheckInvalidIncrement(filename, clean_lines, line, error) + CheckMakePairUsesDeduction(filename, clean_lines, line, error) + CheckRedundantVirtual(filename, clean_lines, line, error) + CheckRedundantOverrideOrFinal(filename, clean_lines, line, error) + for check_fn in extra_check_functions: + check_fn(filename, clean_lines, line, error) + +def FlagCxx11Features(filename, clean_lines, linenum, error): + """Flag those c++11 features that we only allow in certain places. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) + + # Flag unapproved C++ TR1 headers. + if include and include.group(1).startswith('tr1/'): + error(filename, linenum, 'build/c++tr1', 5, + ('C++ TR1 headers such as <%s> are unapproved.') % include.group(1)) + + # Flag unapproved C++11 headers. + # if include and include.group(1) in ('cfenv', + # 'condition_variable', + # 'fenv.h', + # 'future', + # 'mutex', + # 'thread', + # 'chrono', + # 'ratio', + # 'regex', + # 'system_error', + # ): + # error(filename, linenum, 'build/c++11', 5, + # ('<%s> is an unapproved C++11 header.') % include.group(1)) + + # The only place where we need to worry about C++11 keywords and library + # features in preprocessor directives is in macro definitions. + if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return + + # These are classes and free functions. The classes are always + # mentioned as std::*, but we only catch the free functions if + # they're not found by ADL. They're alphabetical by header. + for top_name in ( + # type_traits + 'alignment_of', + 'aligned_union', + ): + if Search(r'\bstd::%s\b' % top_name, line): + error(filename, linenum, 'build/c++11', 5, + ('std::%s is an unapproved C++11 class or function. Send c-style ' + 'an example of where it would make your code more readable, and ' + 'they may let you use it.') % top_name) + + +def FlagCxx14Features(filename, clean_lines, linenum, error): + """Flag those C++14 features that we restrict. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) + + # Flag unapproved C++14 headers. + if include and include.group(1) in ('scoped_allocator', 'shared_mutex'): + error(filename, linenum, 'build/c++14', 5, + ('<%s> is an unapproved C++14 header.') % include.group(1)) + + +def ProcessFileData(filename, file_extension, lines, error, + extra_check_functions=[]): + """Performs lint checks and reports any errors to the given error function. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + lines = (['// marker so line numbers and indices both start at 1'] + lines + + ['// marker so line numbers end in a known way']) + + include_state = _IncludeState() + function_state = _FunctionState() + nesting_state = NestingState() + + ResetNolintSuppressions() + + CheckForCopyright(filename, lines, error) + ProcessGlobalSuppresions(lines) + RemoveMultiLineComments(filename, lines, error) + clean_lines = CleansedLines(lines) + + if IsHeaderExtension(file_extension): + CheckForHeaderGuard(filename, clean_lines, error) + + for line in xrange(clean_lines.NumLines()): + ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions) + FlagCxx11Features(filename, clean_lines, line, error) + nesting_state.CheckCompletedBlocks(filename, error) + + CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) + + # Check that the .cc file has included its header if it exists. + if _IsSourceExtension(file_extension): + CheckHeaderFileIncluded(filename, include_state, error) + + # We check here rather than inside ProcessLine so that we see raw + # lines rather than "cleaned" lines. + CheckForBadCharacters(filename, lines, error) + + CheckForNewlineAtEOF(filename, lines, error) + +def ProcessConfigOverrides(filename): + """ Loads the configuration files and processes the config overrides. + + Args: + filename: The name of the file being processed by the linter. + + Returns: + False if the current |filename| should not be processed further. + """ + + abs_filename = os.path.abspath(filename) + cfg_filters = [] + keep_looking = True + while keep_looking: + abs_path, base_name = os.path.split(abs_filename) + if not base_name: + break # Reached the root directory. + + cfg_file = os.path.join(abs_path, "CPPLINT.cfg") + abs_filename = abs_path + if not os.path.isfile(cfg_file): + continue + + try: + with open(cfg_file) as file_handle: + for line in file_handle: + line, _, _ = line.partition('#') # Remove comments. + if not line.strip(): + continue + + name, _, val = line.partition('=') + name = name.strip() + val = val.strip() + if name == 'set noparent': + keep_looking = False + elif name == 'filter': + cfg_filters.append(val) + elif name == 'exclude_files': + # When matching exclude_files pattern, use the base_name of + # the current file name or the directory name we are processing. + # For example, if we are checking for lint errors in /foo/bar/baz.cc + # and we found the .cfg file at /foo/CPPLINT.cfg, then the config + # file's "exclude_files" filter is meant to be checked against "bar" + # and not "baz" nor "bar/baz.cc". + if base_name: + pattern = re.compile(val) + if pattern.match(base_name): + if _cpplint_state.quiet: + # Suppress "Ignoring file" warning when using --quiet. + return False + sys.stderr.write('Ignoring "%s": file excluded by "%s". ' + 'File path component "%s" matches ' + 'pattern "%s"\n' % + (filename, cfg_file, base_name, val)) + return False + elif name == 'linelength': + global _line_length + try: + _line_length = int(val) + except ValueError: + sys.stderr.write('Line length must be numeric.') + elif name == 'root': + global _root + # root directories are specified relative to CPPLINT.cfg dir. + _root = os.path.join(os.path.dirname(cfg_file), val) + elif name == 'headers': + ProcessHppHeadersOption(val) + else: + sys.stderr.write( + 'Invalid configuration option (%s) in file %s\n' % + (name, cfg_file)) + + except IOError: + sys.stderr.write( + "Skipping config file '%s': Can't open for reading\n" % cfg_file) + keep_looking = False + + # Apply all the accumulated filters in reverse order (top-level directory + # config options having the least priority). + for filter in reversed(cfg_filters): + _AddFilters(filter) + + return True + + +def ProcessFile(filename, vlevel, extra_check_functions=[]): + """Does google-lint on a single file. + + Args: + filename: The name of the file to parse. + + vlevel: The level of errors to report. Every error of confidence + >= verbose_level will be reported. 0 is a good default. + + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + + _SetVerboseLevel(vlevel) + _BackupFilters() + old_errors = _cpplint_state.error_count + + if not ProcessConfigOverrides(filename): + _RestoreFilters() + return + + lf_lines = [] + crlf_lines = [] + try: + # Support the UNIX convention of using "-" for stdin. Note that + # we are not opening the file with universal newline support + # (which codecs doesn't support anyway), so the resulting lines do + # contain trailing '\r' characters if we are reading a file that + # has CRLF endings. + # If after the split a trailing '\r' is present, it is removed + # below. + if filename == '-': + lines = codecs.StreamReaderWriter(sys.stdin, + codecs.getreader('utf8'), + codecs.getwriter('utf8'), + 'replace').read().split('\n') + else: + lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n') + + # Remove trailing '\r'. + # The -1 accounts for the extra trailing blank line we get from split() + for linenum in range(len(lines) - 1): + if lines[linenum].endswith('\r'): + lines[linenum] = lines[linenum].rstrip('\r') + crlf_lines.append(linenum + 1) + else: + lf_lines.append(linenum + 1) + + except IOError: + sys.stderr.write( + "Skipping input '%s': Can't open for reading\n" % filename) + _RestoreFilters() + return + + # Note, if no dot is found, this will give the entire filename as the ext. + file_extension = filename[filename.rfind('.') + 1:] + + # When reading from stdin, the extension is unknown, so no cpplint tests + # should rely on the extension. + if filename != '-' and file_extension not in _valid_extensions: + sys.stderr.write('Ignoring %s; not a valid file name ' + '(%s)\n' % (filename, ', '.join(_valid_extensions))) + else: + ProcessFileData(filename, file_extension, lines, Error, + extra_check_functions) + + # If end-of-line sequences are a mix of LF and CR-LF, issue + # warnings on the lines with CR. + # + # Don't issue any warnings if all lines are uniformly LF or CR-LF, + # since critique can handle these just fine, and the style guide + # doesn't dictate a particular end of line sequence. + # + # We can't depend on os.linesep to determine what the desired + # end-of-line sequence should be, since that will return the + # server-side end-of-line sequence. + if lf_lines and crlf_lines: + # Warn on every line with CR. An alternative approach might be to + # check whether the file is mostly CRLF or just LF, and warn on the + # minority, we bias toward LF here since most tools prefer LF. + for linenum in crlf_lines: + Error(filename, linenum, 'whitespace/newline', 1, + 'Unexpected \\r (^M) found; better to use only \\n') + + # Suppress printing anything if --quiet was passed unless the error + # count has increased after processing this file. + if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count: + sys.stdout.write('Done processing %s\n' % filename) + _RestoreFilters() + + +def PrintUsage(message): + """Prints a brief usage string and exits, optionally with an error message. + + Args: + message: The optional error message. + """ + sys.stderr.write(_USAGE) + if message: + sys.exit('\nFATAL ERROR: ' + message) + else: + sys.exit(1) + + +def PrintCategories(): + """Prints a list of all the error-categories used by error messages. + + These are the categories used to filter messages via --filter. + """ + sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES)) + sys.exit(0) + + +def ParseArguments(args): + """Parses the command line arguments. + + This may set the output format and verbosity level as side-effects. + + Args: + args: The command line arguments: + + Returns: + The list of filenames to lint. + """ + try: + (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=', + 'counting=', + 'filter=', + 'root=', + 'linelength=', + 'extensions=', + 'headers=', + 'quiet']) + except getopt.GetoptError: + PrintUsage('Invalid arguments.') + + verbosity = _VerboseLevel() + output_format = _OutputFormat() + filters = '' + quiet = _Quiet() + counting_style = '' + + for (opt, val) in opts: + if opt == '--help': + PrintUsage(None) + elif opt == '--output': + if val not in ('emacs', 'vs7', 'eclipse'): + PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.') + output_format = val + elif opt == '--quiet': + quiet = True + elif opt == '--verbose': + verbosity = int(val) + elif opt == '--filter': + filters = val + if not filters: + PrintCategories() + elif opt == '--counting': + if val not in ('total', 'toplevel', 'detailed'): + PrintUsage('Valid counting options are total, toplevel, and detailed') + counting_style = val + elif opt == '--root': + global _root + _root = val + elif opt == '--linelength': + global _line_length + try: + _line_length = int(val) + except ValueError: + PrintUsage('Line length must be digits.') + elif opt == '--extensions': + global _valid_extensions + try: + _valid_extensions = set(val.split(',')) + except ValueError: + PrintUsage('Extensions must be comma seperated list.') + elif opt == '--headers': + ProcessHppHeadersOption(val) + + if not filenames: + PrintUsage('No files were specified.') + + _SetOutputFormat(output_format) + _SetQuiet(quiet) + _SetVerboseLevel(verbosity) + _SetFilters(filters) + _SetCountingStyle(counting_style) + + return filenames + + +def main(): + filenames = ParseArguments(sys.argv[1:]) + + # Change stderr to write with replacement characters so we don't die + # if we try to print something containing non-ASCII characters. + sys.stderr = codecs.StreamReaderWriter(sys.stderr, + codecs.getreader('utf8'), + codecs.getwriter('utf8'), + 'replace') + + _cpplint_state.ResetErrorCounts() + for filename in filenames: + ProcessFile(filename, _cpplint_state.verbose_level) + # If --quiet is passed, suppress printing error count unless there are errors. + if not _cpplint_state.quiet or _cpplint_state.error_count > 0: + _cpplint_state.PrintErrorCounts() + + sys.exit(_cpplint_state.error_count > 0) + + +if __name__ == '__main__': + main() diff --git a/eval/Makefile b/eval/Makefile new file mode 100644 index 0000000..1cae7a8 --- /dev/null +++ b/eval/Makefile @@ -0,0 +1,133 @@ +EVAL_DF_BINS=10,20,30,40,50,60,70,80,90,100 + +all: eval lighteval + +lighteval: vitoria.lighteval stuttgart.lighteval paris.lighteval switzerland.lighteval + +eval: vitoria.eval stuttgart.eval paris.eval switzerland.eval + +clean: + @rm -f *.eval + @rm -rf gtfs + @rm -rf osm + @rm -rf evalout + +osmconvert: + @echo `date +"[%F %T.%3N]"` "EVAL : Fetching osmconvert..." + @curl http://m.m.i24.cc/osmconvert.c | cc -x c - -lz -O3 -o osmconvert + +%.lighteval: osm/%.osm gtfs/%/stops.txt gtfs/%/stop_times.txt gtfs/%/trips.txt gtfs/%/routes.txt eval.cfg + @echo `date +"[%F %T.%3N]"` "EVAL : Running light (without stats) evaluation for '"$*"'..." + @mkdir -p gtfs/$*/shaped + @rm -f gtfs/$*/shaped/* + @../build/pfaedle -x $< -i gtfs/$* -c eval.cfg -o gtfs/$*/shaped -D -m all 2>&1 | tee $@ + +%.eval: osm/%.osm gtfs/%/stops.txt gtfs/%/stop_times.txt gtfs/%/trips.txt gtfs/%/routes.txt eval.cfg eval-wo-osm-line-rels.cfg + @echo `date +"[%F %T.%3N]"` "EVAL : Running evaluation for '"$*"'..." + @mkdir -p gtfs/$*/shaped + @rm -f gtfs/$*/shaped/* + @mkdir -p evalout/ + @mkdir -p evalout/$*/ + @mkdir -p evalout/$*/hmm+osm + @../build/pfaedle -x $< -i gtfs/$* -c eval.cfg --eval-path evalout/$*/hmm+osm -o gtfs/$*/shaped -D -m all --eval --eval-df-bins $(EVAL_DF_BINS) 2>&1 | tee $@ + @find evalout/$*/hmm+osm/ -name "*.json" -print0 | xargs -0 rm + + @mkdir -p evalout/$*/greedy + @../build/pfaedle -x $< -i gtfs/$* -c eval.cfg --method greedy --eval-path evalout/$*/greedy -o gtfs/$*/shaped -D -m all --eval --eval-df-bins $(EVAL_DF_BINS) 2>&1 | tee $@ + @find evalout/$*/greedy/ -name "*.json" -print0 | xargs -0 rm + + @mkdir -p evalout/$*/greedy2 + @../build/pfaedle -x $< -i gtfs/$* -c eval.cfg --method greedy2 --eval-path evalout/$*/greedy2 -o gtfs/$*/shaped -D -m all --eval --eval-df-bins $(EVAL_DF_BINS) 2>&1 | tee $@ + @find evalout/$*/greedy2/ -name "*.json" -print0 | xargs -0 rm + + @mkdir -p evalout/$*/hmm + @../build/pfaedle -x $< -i gtfs/$* -c eval-wo-osm-line-rels.cfg --eval-path evalout/$*/hmm -o gtfs/$*/shaped -D -m all --eval --eval-df-bins $(EVAL_DF_BINS) 2>&1 | tee $@ + @find evalout/$*/hmm/ -name "*.json" -print0 | xargs -0 rm + +osm/spain-latest.osm.pbf: + @mkdir -p osm + @echo `date +"[%F %T.%3N]"` "EVAL : Downloading OSM data for Spain..." + @curl --progress-bar http://download.geofabrik.de/europe/spain-latest.osm.pbf > $@ + +osm/spain-latest.osm: osm/spain-latest.osm.pbf osmconvert + @# pre-filter to vitoria gasteiz + @echo `date +"[%F %T.%3N]"` "EVAL : Pre-filtering OSM data to Vitoria-Gasteiz..." + @osmconvert -b=-2.8661,42.7480,-2.4788,43.0237 $< > $@ + +osm/baden-wuerttemberg-latest.osm.pbf: + @mkdir -p osm + @echo `date +"[%F %T.%3N]"` "EVAL : Downloading OSM data for Baden-Württemberg..." + @curl --progress-bar http://download.geofabrik.de/europe/germany/baden-wuerttemberg-latest.osm.pbf > $@ + +osm/baden-wuerttemberg-latest.osm: osm/baden-wuerttemberg-latest.osm.pbf osmconvert + @echo `date +"[%F %T.%3N]"` "EVAL : Extracting OSM data..." + @osmconvert $< > $@ + +osm/france-latest.osm.pbf: + @mkdir -p osm + @echo `date +"[%F %T.%3N]"` "EVAL : Downloading OSM data for France..." + @curl --progress-bar http://download.geofabrik.de/europe/france-latest.osm.pbf > $@ + +osm/paris-latest.osm: osm/france-latest.osm.pbf osmconvert + @# pre-filter to greater ile de france + @echo `date +"[%F %T.%3N]"` "EVAL : Pre-filtering OSM data to Île-de-France..." + @osmconvert -b=0.374,47.651,4.241,50.261 $< > $@ + +osm/europe-latest.osm.pbf: + @mkdir -p osm + @echo `date +"[%F %T.%3N]"` "EVAL : Downloading OSM data for Europe..." + @curl --progress-bar http://download.geofabrik.de/europe-latest.osm.pbf > $@ + +osm/switzerland-latest.osm: osm/europe-latest.osm.pbf osmconvert + @# pre-filter to greater switzerland + @echo `date +"[%F %T.%3N]"` "EVAL : Pre-filtering OSM data to Switzerland..." + @osmconvert -b=3.757,44.245,15.579,52.670 $< > $@ + +gtfs/vitoria/%.txt: + @echo `date +"[%F %T.%3N]"` "EVAL : Downloading GTFS data for Vitoria-Gasteiz..." + @mkdir -p gtfs + @mkdir -p gtfs/vitoria + @curl --progress-bar https://transitfeeds.com/p/tuvisa-euskotran/239/latest/download > gtfs/vitoria/gtfs.zip + @cd gtfs/vitoria && unzip -qq -o gtfs.zip + @rm gtfs/vitoria/gtfs.zip + +gtfs/stuttgart/%.txt: + @echo `date +"[%F %T.%3N]"` "EVAL : Downloading GTFS data for Stuttgart..." + @mkdir -p gtfs + @mkdir -p gtfs/stuttgart + @echo "******************************************************************" + @echo "* A password is required to access the VVS dataset. Send a mail *" + @echo "* to brosi@cs.informatik.uni-freiburg.de to receive the password. " + @echo "******************************************************************" + @curl --progress-bar http://www.vvs.de/download/opendata/VVS_GTFS.zip -su vvsopendata01 > gtfs/stuttgart/gtfs.zip + @cd gtfs/stuttgart && unzip -qq -o gtfs.zip + @rm gtfs/stuttgart/gtfs.zip + +gtfs/paris/%.txt: + @echo `date +"[%F %T.%3N]"` "EVAL : Downloading GTFS data for Paris..." + @mkdir -p gtfs + @mkdir -p gtfs/paris + @curl --progress-bar https://transitfeeds.com/p/stif/822/latest/download > gtfs/paris/gtfs.zip + @cd gtfs/paris && unzip -qq -o gtfs.zip + @rm gtfs/paris/gtfs.zip + +gtfs/switzerland/%.txt: + @echo `date +"[%F %T.%3N]"` "EVAL : Downloading GTFS data for Switzerland..." + @mkdir -p gtfs + @mkdir -p gtfs/switzerland + @curl --progress-bar http://gtfs.geops.ch/dl/gtfs_complete.zip > gtfs/switzerland/gtfs.zip + @cd gtfs/switzerland && unzip -qq -o gtfs.zip + @rm gtfs/switzerland/gtfs.zip + + +osm/vitoria.osm: osm/spain-latest.osm gtfs/vitoria/stops.txt gtfs/vitoria/trips.txt gtfs/vitoria/routes.txt gtfs/vitoria/stop_times.txt eval.cfg + @../build/pfaedle -x $< -i gtfs/vitoria/ -c eval.cfg -m all -X $@ + +osm/stuttgart.osm: osm/baden-wuerttemberg-latest.osm gtfs/stuttgart/stops.txt gtfs/stuttgart/trips.txt gtfs/stuttgart/routes.txt gtfs/stuttgart/stop_times.txt eval.cfg + @../build/pfaedle -x $< -i gtfs/stuttgart/ -c eval.cfg -m all -X $@ + +osm/paris.osm: osm/paris-latest.osm gtfs/paris/stops.txt gtfs/paris/trips.txt gtfs/paris/routes.txt gtfs/paris/stop_times.txt eval.cfg + @../build/pfaedle -x $< -i gtfs/paris/ -c eval.cfg -m all -X $@ + +osm/switzerland.osm: osm/switzerland-latest.osm gtfs/switzerland/stops.txt eval.cfg + @../build/pfaedle -x $< -i gtfs/switzerland/ -c eval.cfg -m all -X $@ diff --git a/eval/eval-wo-osm-line-rels.cfg b/eval/eval-wo-osm-line-rels.cfg new file mode 100644 index 0000000..e37f2b8 --- /dev/null +++ b/eval/eval-wo-osm-line-rels.cfg @@ -0,0 +1,987 @@ +# Copyright 2018, University of Freiburg +# Chair of Algorithms and Datastructures +# Authors: Patrick Brosi + +[rail] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + railway=rail + railway=light_rail + railway=narrow_gauge + route=rail + route=train + public_transport=stop_area|rel_flat + +osm_filter_lvl1: + usage=branch + +osm_filter_lvl2: + +osm_filter_lvl3: + service=crossover + service=siding + # we cannot completely drop service=yard, because it is often used + # incorrectly for crossovers + service=yard + +osm_filter_lvl4: + +osm_filter_lvl5: + usage=industrial + usage=military + usage=test + service=spur + railway:traffic_mode=freight + +# OSM entities to drop, as k=v. Applies to nodes, edges and +# relations. +# Nodes included in non-dropped ways are kept regardless of +# a matching drop filter. +# Ways included in non-dropped relations are kept regardless of +# a matching drop filter. + +osm_filter_drop: + railway=abandoned + railway=construction + railway=disused + railway=miniature + railway=signal + railway=razed + railway=proposed + metro=yes + area=yes + # access=no + type=multipolygon + railway=platform + public_transport=platform + building=yes + building=train_station + amenity=shelter + amenity=bus_station + building=roof + +# Nodes that should act as "no-hup" nodes. These are nodes +# that are contained in multiple ways, but cannot be used +# to switch from one way to another (for example, a +# track crossing in rail networks) + +osm_filter_nohup: + railway:switch=no + railway=railway_crossing + +# Edges that should act as one-way nodes. + +osm_filter_oneway: + oneway=yes + railway:preferred_direction=forward + +osm_filter_oneway_reverse: + railway:preferred_direction=backward + +# Edges that may explicitely be used in +# both directions. May be used to set exception +# to "osm_filter_oneway" + +osm_filter_undirected: + oneway=false + oneway=no + oneway=-1 + railway:preferred_direction=both + railway:bidirectional=regular + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + public_transport=stop_position + railway=stop + railway=halt + railway=station + #railway=tram_stop + railway=subway_stop + tram_stop=* + stop=* + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + [public_transport=stop_area]uic_ref=500 + [public_transport=stop_area]wikidata=500 + name=100 + [public_transport=stop_area]name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10, 100, 200 + +# max edge level to which station will be snapped +osm_max_snap_level: 2 + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + [public_transport=stop_area]name + uic_name + +# the track number tag in edges, first match is taken +osm_edge_track_number_tags: + railway:track_ref + local_ref + ref + +# the track number tag in stop nodes, first match is taken, +# overwrites osm_edge_track_number_tags +osm_track_number_tags: + local_ref + ref + +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.25 +routing_lvl2_fac: 1.5 +routing_lvl3_fac: 2 +routing_lvl4_fac: 2.5 +routing_lvl5_fac: 3.5 +routing_lvl6_fac: 5 +routing_lvl7_fac: 7 + +# Punishment (in meters) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_punish: 3000 + +routing_station_distance_punish_fac: 3.14 + +routing_non_osm_station_punish: 100 + +routing_platform_unmatched_punish: 2000 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 100 + +# Max angle in a route from a station to an already reachable neighbar +routing_snap_full_turn_angle: 100 + +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 100 + +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 1 + +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +routing_line_unmatched_punish_fac: 1 + +# special line normalization for trains +line_normalize_chain: + , -> ' '; + - -> ' '; + _ -> ' '; + " -> ''; + ' -> ''; + ` -> ''; + / -> ' '; + < -> ' '; + > -> ' '; + & -> '+'; + ä -> ae; + ö -> oe; + ü -> ue; + ß -> ss; + è -> e; + é -> e; + á -> a; + à -> a; + ó -> o; + ò -> o; + í -> i; + ú -> u; + ù -> u; + ë -> e; + ç -> c; + å -> ae; + â -> a; + ê -> e; + ï -> i; + œ -> oe; + ø -> oe; + ^line -> ''; + ^linie -> ''; + ^metro -> ''; + ^tram -> ''; + ^strassenbahn -> ''; + ^bus -> ''; + + # delete everything in brackets + \(.+\) -> ' '; + \[.+\] -> ' '; + + # whitespace + \s+ -> ' '; + ^\s -> ''; + \s$ -> ''; + + # line/number combs ALWAYS with whitespace (ICE101 -> ICE 101) + ^([a-zA-Z]+)([0-9]+)$ -> \1 \2; + + # if a character line number is present, delete the numeric part + ^([a-zA-Z]+) [0-9]+$ -> \1; + +[bus] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + # highways + highway=motorway + highway=trunk + highway=primary + highway=secondary + highway=tertiary + highway=residential + highway=living_street + highway=unclassified + + # highway links + highway=motorway_link + highway=trunk_link + highway=primary_link + highway=secondary_link + highway=tertiary_link + highway=residential_link + + way=primary + way=seconday + way=bus_guideway + highway=bus_guideway + busway=* + psv=yes + psv=designated + + trolley_wire=yes + trolleywire=yes + trolleybus=yes + trolley_bus=yes + + route=bus + route=trolleybus + bus=yes + bus=designated + minibus=designated + minibus=yes + + public_transport=stop_position + bus_stop=* + stop=* + highway=bus_stop + amenity=bus_station|no_match_ways|no_match_rels + + # relations for the restriction system + type=restriction + type=restriction:bus + type=restriction:motorcar + +osm_filter_lvl1: + highway=secondary + highway=secondary_link + bus=yes + bus=designated + minibus=yes + minibus=designated + psv=designated + psv=yes + access=psv + access=bus + trolley_wire=yes + trolleywire=yes + trolleybus=yes + trolley_bus=yes + psv=designated + +osm_filter_lvl2: + highway=tertiary + highway=tertiary_link + +osm_filter_lvl3: + highway=unclassified + highway=residential + highway=road + +osm_filter_lvl4: + highway=living_street + highway=pedestrian + highway=service + psv=no + +osm_filter_lvl5: + bus=no + service=siding + access=permissive + access=private + access=no + service=parking_aisle + highway=footway + +# OSM entities to drop, as k=v. Applies to nodes, edges and +# relations. +# Nodes included in non-dropped ways are kept regardless of +# a matching drop filter. +# Ways included in non-dropped relations are kept regardless of +# a matching drop filter. + +osm_filter_drop: + area=yes + train=yes|no_match_ways + # access=no + public_transport=stop_area|no_match_nds|no_match_rels + type=multipolygon + railway=platform + railway=station + # service=parking_aisle + highway=proposed + highway=footway + highway=construction + building=yes + building=train_station + leisure=garden + leisure=park + +# Nodes that should act as "no-hup" nodes. These are nodes +# that are contained in multiple ways, but cannot be used +# to switch from one way to another (for example, a +# track crossing in rail networks) + +osm_filter_nohup: + +# Configuration of the OSM road restriction system +# We only support restriction with a single via node +# atm + +osm_node_negative_restriction: + restriction=no_right_turn + restriction=no_left_turn + restriction=no_u_turn + restriction=no_straight_on + restriction:bus=no_right_turn + restriction:bus=no_left_turn + restriction:bus=no_u_turn + restriction:bus=no_straight_on + +osm_node_positive_restriction: + restriction=only_left_turn + restriction=only_straight_on + restriction=only_right_turn + restriction:bus=only_left_turn + restriction:bus=only_straight_on + restriction:bus=only_right_turn + +osm_filter_no_restriction: + except=psv|mult_val_match + except=bus|mult_val_match + +# Edges that should act as one-way nodes. + +osm_filter_oneway: + junction=roundabout # oneway=yes is implied + highway=motorway # oneway=yes is implied + oneway=yes + oneway=1 + oneway=true + oneway:bus=yes + oneway:bus=1 + oneway:bus=true + oneway:psv=yes + oneway:psv=1 + oneway:psv=true + +osm_filter_oneway_reverse: + oneway=-1 + +# Edges that may explicitely be used in +# both directions. May be used to set exception +# to "osm_filter_oneway" + +osm_filter_undirected: + oneway=false + oneway=0 + oneway=alternating + oneway=reversible + oneway=no + oneway:bus=no + oneway:bus=0 + oneway:bus=false + oneway:psv=no + oneway:psv=0 + oneway:psv=false + busway=opposite_lane + busway=opposite + busway:left=opposite_lane + busway:right=opposite_lane + psv=opposite_lane + psv=opposite + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + public_transport=stop_position + bus_stop=* + stop=* + highway=bus_stop + amenity=bus_station + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10 , 50, 100 + +osm_max_snap_level: 5 + +osm_max_osm_station_distance: 7.5 + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + uic_name + +# the track number tag in stop nodes, first one is taken +osm_track_number_tags: local_ref + +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.25 +routing_lvl2_fac: 1.5 +routing_lvl3_fac: 1.75 +routing_lvl4_fac: 2.25 +routing_lvl5_fac: 3 +routing_lvl6_fac: 4 +routing_lvl7_fac: 5 + +# Punishment (in meters) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_punish: 500 + +routing_station_distance_punish_fac: 2.5 + +routing_non_osm_station_punish: 500 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 20 + +# Max angle in a route from a station to an already reachable neighbor +routing_snap_full_turn_angle: 110 + +osm_max_node_block_distance: 10 + +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 0 + +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 4 + +routing_one_way_edge_punish: 5000 + +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +# routing_line_unmatched_punish_fac: 1.75 + +[tram, subway, funicular] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + route=tram + railway=subway + railway=light_rail + railway=tram + railway=funicular + railway=station + railway=halt + railway=tram_stop + route=subway + route=light_rail + subway=yes + tram=yes + +osm_filter_lv2: + service=siding + +osm_filter_lvl5: + service=crossover + service=yard + +# OSM entities to drop, as k=v. Applies to nodes, edges and +# relations. +# Nodes included in non-dropped ways are kept regardless of +# a matching drop filter. +# Ways included in non-dropped relations are kept regardless of +# a matching drop filter. + +osm_filter_drop: + area=yes + public_transport=stop_area + type=multipolygon + railway=platform + public_transport=platform + service=alley + +# Nodes that should act as "no-hup" nodes. These are nodes +# that are contained in multiple ways, but cannot be used +# to switch from one way to another (for example, a +# track crossing in rail networks) + +osm_filter_nohup: + railway:switch=no + railway=railway_crossing + +# Edges that should act as one-way nodes. + +osm_filter_oneway: + oneway=yes + +# Edges that may explicitely be used in +# both directions. May be used to set exception +# to "osm_filter_oneway" + +osm_filter_undirected: + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + public_transport=stop_position + station=subway + station=tram + railway=stop + railway=halt + railway=station + railway=tram_stop + railway=subway_stop + tram_stop=* + stop=* + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10, 50, 100 + +osm_max_snap_level: 4 + + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + uic_name + +# the track number tag in stop nodes, first one is taken +osm_track_number_tags: local_ref + +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.5 +routing_lvl2_fac: 2 +routing_lvl3_fac: 2.5 +routing_lvl4_fac: 3.5 +routing_lvl5_fac: 5 +routing_lvl6_fac: 5 +routing_lvl7_fac: 5 + +# Punishment (in meters) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_punish: 2000 + +routing_station_distance_punish_fac: 3.14 + +routing_non_osm_station_punish: 235 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 80 + +# Max angle in a route from a station to an already reachable neighbar +routing_snap_full_turn_angle: 80 + +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 100 + +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 1 + +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +routing_line_unmatched_punish_fac: 0.5 + +[ferry] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + route=ferry + waterway=river + motorboat=yes + ferry=yes + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + public_transport=stop_position + station=ferry + railway=stop + railway=halt + railway=station + stop=* + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10, 100, 200 + +osm_max_snap_level: 4 + + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + uic_name + +# the track number tag in stop nodes, first one is taken +osm_track_number_tags: local_ref + +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.5 +routing_lvl2_fac: 2 +routing_lvl3_fac: 2.5 +routing_lvl4_fac: 3.5 +routing_lvl5_fac: 5 +routing_lvl6_fac: 5 +routing_lvl7_fac: 5 + +# Punishment (in meters) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_punish: 100 + +routing_station_distance_punish_fac: 3.14 + +routing_non_osm_station_punish: 50 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 45 + +# Max angle in a route from a station to an already reachable neighbar +routing_snap_full_turn_angle: 0 + +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 0 + +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 1 + +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +routing_line_unmatched_punish_fac: 0.5 + +[tram, bus, subway, rail, gondola, funicular, ferry] +# Regular expressions and station comparision is +# always case insensitive! +station_normalize_chain: + , -> ' '; + - -> ' '; + — -> ' '; + _ -> ' '; + " -> ''; + ' -> ''; + ` -> ''; + \( -> ' '; + \) -> ' '; + \[ -> ' '; + \] -> ' '; + / -> ' '; + '\\' -> ' '; + < -> ' '; + > -> ' '; + & -> '+'; + ä -> ae; + ö -> oe; + ü -> ue; + ß -> ss; + è -> e; + é -> e; + á -> a; + à -> a; + ó -> o; + ò -> o; + ô -> o; + ç -> c; + í -> i; + ú -> u; + ù -> u; + ë -> e; + å -> ae; + â -> a; + ê -> e; + ï -> i; + œ -> oe; + ø -> oe; + str\. -> strasse; + av\. -> avenue; + + # always separate 'street', 'strasse' + '([a-zA-Z])strasse($| )' -> '\1 strasse\2'; + '([a-zA-Z])street($| )' -> '\1 street\2'; + + # always use "street" + '(^| )strasse($| )' -> '\1street\2'; + + # always use "avenue" + '(^| )avenida($| )' -> '\1avenue\2'; + '(^| )avenu($| )' -> '\1avenue\2'; + + # normalize every possible abbr. of german "Bahnhof", "Hauptbahnhof", "Busbahnhof" + '(^| )hauptbf\.($| )' -> '\1hauptbahnhof\2'; + '(^| )hauptbf($| )' -> '\1hauptbahnhof\2'; + '(^| )hauptbhf\.($| )' -> '\1hauptbahnhof\2'; + '(^| )hauptbhf($| )' -> '\1hauptbahnhof\2'; + '(^| )zentraler busbahnhof($| )$' -> \1busbahnhof\2; + '(^| )zentraler omnibusbahnhof($| )$' -> \1busbahnhof\2; + '(^| )omnibusbahnhof($| )' -> '\1busbahnhof\2'; + '(^| )omnibusbhf($| )' -> '\1busbahnhof\2'; + '(^| )busbf\.($| )' -> '\1busbahnhof\2'; + '(^| )busbf($| )' -> '\1busbahnhof\2'; + '(^| )bus bf\.($| )' -> '\1busbahnhof\2'; + '(^| )bus bf($| )' -> '\1busbahnhof\2'; + '(^| )busbhf\.($| )' -> '\1busbahnhof\2'; + '(^| )busbhf($| )' -> '\1busbahnhof\2'; + '(^| )bus bhf\.($| )' -> '\1busbahnhof\2'; + '(^| )bus bhf($| )' -> '\1busbahnhof\2'; + '(^| )zob($| )' -> '\1busbahnhof\2'; + '(^| )hbf\.($| )' -> '\1hauptbahnhof\2'; + '(^| )hbf($| )' -> '\1hauptbahnhof\2'; + '(^| )hb\.($| )' -> '\1hauptbahnhof\2'; + '(^| )hb($| )' -> '\1hauptbahnhof\2'; + '(^| )bf\.($| )' -> '\1bahnhof\2'; + '(^| )bf($| )' -> '\1bahnhof\2'; + '(^| )bhf\.($| )' -> '\1bahnhof\2'; + '(^| )bhf($| )' -> '\1bahnhof\2'; + '(^| )bhfeingang($| )' -> '\1bahnhofeingang\2'; + '(^| )gare de($| )' -> '\1gare\2'; + + + # if a stations starts with single station identifier + # always put it at the end (for example, "hauptbahnhof freiburg" becomes "freiburg hauptbahnhof") + '^hauptbahnhof (.+)$' -> \1 hauptbahnhof; + '^bahnhof (.+)$' -> \1 bahnhof; + '^busbahnhof (.+)$' -> \1 busbahnhof; + '^gare (.+)$' -> \1 gare; + '^station (.+)$' -> \1 station; + + '(^| )busbahnhof($| )' -> '\1bbahnhof\2'; + + # normalize line types in station names + '(^| )u bahn\.($| )' -> '\1ubahn\2'; + '(^| )metro\.($| )' -> '\1ubahn\2'; + '(^| )subway\.($| )' -> '\1ubahn\2'; + '(^| )underground\.($| )' -> '\1ubahn\2'; + '(^| )ubahn($| )' -> '\1u\2'; + '(^| )s bahn\.($| )' -> '\1sbahn\2'; + '(^| )sbahn($| )' -> '\1s\2'; + '(^| )tramway($| )' -> '\1tram\2'; + '(^| )stadtbahn($| )' -> '\1tram\2'; + '(^| )strassenbahn($| )' -> '\1tram\2'; + '(^| )streetcar($| )' -> '\1tram\2'; + '(^| )tram($| )' -> '\1t\2'; + + # delete track information from name + '(^| )kante [a-zA-Z0-9]{1,2}($| )' -> ' '; + '(^| )gleis [a-zA-Z0-9]{1,2}($| )' -> ' '; + '(^| )track [a-zA-Z0-9]{1,2}($| )' -> ' '; + '(^| )voie [a-zA-Z0-9]{1,2}($| )' -> ' '; + + # abbrv + '(^| )und($| )' -> '\1+\2'; + '(^| )and($| )' -> '\1+\2'; + '(^| )et($| )' -> '\1+\2'; + + # noise + '\sde\s' -> ' '; + '\sda\s' -> ' '; + '\sdi\s' -> ' '; + '\sdel\s' -> ' '; + '\sdal\s' -> ' '; + + # abbrv in most western languages + '(^| )saint ' -> '\1st. '; + '(^| )sankt ' -> '\1st. '; + '(^| )sanct ' -> '\1st. '; + + \. -> ' '; + + # whitespace + \s+ -> ' '; + ^\s -> ''; + \s$ -> ''; + +line_normalize_chain: + , -> ' '; + - -> ' '; + _ -> ' '; + " -> ''; + ' -> ''; + ` -> ''; + / -> ' '; + < -> ' '; + > -> ' '; + & -> '+'; + ä -> ae; + ö -> oe; + ü -> ue; + ß -> ss; + è -> e; + é -> e; + á -> a; + à -> a; + ó -> o; + ò -> o; + í -> i; + ú -> u; + ù -> u; + ë -> e; + å -> ae; + ç -> c; + â -> a; + ê -> e; + ï -> i; + œ -> oe; + ø -> oe; + ^line -> ''; + ^linie -> ''; + ^metro -> ''; + ^tram -> ''; + ^strassenbahn -> ''; + ^bus -> ''; + + # delete everything in brackets + \(.+\) -> ' '; + \[.+\] -> ' '; + + # whitespace + \s+ -> ' '; + ^\s -> ''; + \s$ -> ''; + + # line/number combs ALWAYS without whitespace (T 2 -> T2) + ^([a-zA-Z]+) ([0-9]+)$ -> \1\2; + +track_normalize_chain: + '(^| )gleis($| )' -> ''; + '(^| )gl\.($| )' -> ''; + '(^| )platform($| )' -> ''; + '(^| )track($| )' -> ''; + '(^| )rail($| )' -> ''; + # line/number combs ALWAYS without whitespace (1 A -> 1A) + ^([a-zA-Z]+) ([0-9]+)$ -> \1\2; + ^([0-9]+) ([a-zA-Z]+)$ -> \1\2; + + # delete track numbers greater than 999 + ^[0-9]{4,}$ -> ''; diff --git a/eval/eval.cfg b/eval/eval.cfg new file mode 100644 index 0000000..ab4189c --- /dev/null +++ b/eval/eval.cfg @@ -0,0 +1,1002 @@ +# Copyright 2018, University of Freiburg +# Chair of Algorithms and Datastructures +# Authors: Patrick Brosi + +[rail] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + railway=rail + railway=light_rail + railway=narrow_gauge + route=rail + route=train + public_transport=stop_area|rel_flat + +osm_filter_lvl1: + usage=branch + +osm_filter_lvl2: + +osm_filter_lvl3: + service=crossover + service=siding + # we cannot completely drop service=yard, because it is often used + # incorrectly for crossovers + service=yard + +osm_filter_lvl4: + +osm_filter_lvl5: + usage=industrial + usage=military + usage=test + service=spur + railway:traffic_mode=freight + +# OSM entities to drop, as k=v. Applies to nodes, edges and +# relations. +# Nodes included in non-dropped ways are kept regardless of +# a matching drop filter. +# Ways included in non-dropped relations are kept regardless of +# a matching drop filter. + +osm_filter_drop: + railway=abandoned + railway=construction + railway=disused + railway=miniature + railway=signal + railway=razed + railway=proposed + metro=yes + area=yes + # access=no + type=multipolygon + railway=platform + public_transport=platform + building=yes + building=train_station + amenity=shelter + amenity=bus_station + building=roof + +# Nodes that should act as "no-hup" nodes. These are nodes +# that are contained in multiple ways, but cannot be used +# to switch from one way to another (for example, a +# track crossing in rail networks) + +osm_filter_nohup: + railway:switch=no + railway=railway_crossing + +# Edges that should act as one-way nodes. + +osm_filter_oneway: + oneway=yes + railway:preferred_direction=forward + +osm_filter_oneway_reverse: + railway:preferred_direction=backward + +# Edges that may explicitely be used in +# both directions. May be used to set exception +# to "osm_filter_oneway" + +osm_filter_undirected: + oneway=false + oneway=no + oneway=-1 + railway:preferred_direction=both + railway:bidirectional=regular + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + public_transport=stop_position + railway=stop + railway=halt + railway=station + #railway=tram_stop + railway=subway_stop + tram_stop=* + stop=* + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + line_name=ref,name # careful, no space after/before comma allowed! + from_name=from + to_name=to + +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + [public_transport=stop_area]uic_ref=500 + [public_transport=stop_area]wikidata=500 + name=100 + [public_transport=stop_area]name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10, 100, 200 + +# max edge level to which station will be snapped +osm_max_snap_level: 2 + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + [public_transport=stop_area]name + uic_name + +# the track number tag in edges, first match is taken +osm_edge_track_number_tags: + railway:track_ref + local_ref + ref + +# the track number tag in stop nodes, first match is taken, +# overwrites osm_edge_track_number_tags +osm_track_number_tags: + local_ref + ref + +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.25 +routing_lvl2_fac: 1.5 +routing_lvl3_fac: 2 +routing_lvl4_fac: 2.5 +routing_lvl5_fac: 3.5 +routing_lvl6_fac: 5 +routing_lvl7_fac: 7 + +# Punishment (in meters) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_punish: 3000 + +routing_station_distance_punish_fac: 3.14 + +routing_non_osm_station_punish: 100 + +routing_platform_unmatched_punish: 2000 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 100 + +# Max angle in a route from a station to an already reachable neighbar +routing_snap_full_turn_angle: 100 + +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 100 + +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 1 + +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +routing_line_unmatched_punish_fac: 1 + +# special line normalization for trains +line_normalize_chain: + , -> ' '; + - -> ' '; + _ -> ' '; + " -> ''; + ' -> ''; + ` -> ''; + / -> ' '; + < -> ' '; + > -> ' '; + & -> '+'; + ä -> ae; + ö -> oe; + ü -> ue; + ß -> ss; + è -> e; + é -> e; + á -> a; + à -> a; + ó -> o; + ò -> o; + í -> i; + ú -> u; + ù -> u; + ë -> e; + ç -> c; + å -> ae; + â -> a; + ê -> e; + ï -> i; + œ -> oe; + ø -> oe; + ^line -> ''; + ^linie -> ''; + ^metro -> ''; + ^tram -> ''; + ^strassenbahn -> ''; + ^bus -> ''; + + # delete everything in brackets + \(.+\) -> ' '; + \[.+\] -> ' '; + + # whitespace + \s+ -> ' '; + ^\s -> ''; + \s$ -> ''; + + # line/number combs ALWAYS with whitespace (ICE101 -> ICE 101) + ^([a-zA-Z]+)([0-9]+)$ -> \1 \2; + + # if a character line number is present, delete the numeric part + ^([a-zA-Z]+) [0-9]+$ -> \1; + +[bus] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + # highways + highway=motorway + highway=trunk + highway=primary + highway=secondary + highway=tertiary + highway=residential + highway=living_street + highway=unclassified + + # highway links + highway=motorway_link + highway=trunk_link + highway=primary_link + highway=secondary_link + highway=tertiary_link + highway=residential_link + + way=primary + way=seconday + way=bus_guideway + highway=bus_guideway + busway=* + psv=yes + psv=designated + + trolley_wire=yes + trolleywire=yes + trolleybus=yes + trolley_bus=yes + + route=bus + route=trolleybus + bus=yes + bus=designated + minibus=designated + minibus=yes + + public_transport=stop_position + bus_stop=* + stop=* + highway=bus_stop + amenity=bus_station|no_match_ways|no_match_rels + + # relations for the restriction system + type=restriction + type=restriction:bus + type=restriction:motorcar + +osm_filter_lvl1: + highway=secondary + highway=secondary_link + bus=yes + bus=designated + minibus=yes + minibus=designated + psv=designated + psv=yes + access=psv + access=bus + trolley_wire=yes + trolleywire=yes + trolleybus=yes + trolley_bus=yes + psv=designated + +osm_filter_lvl2: + highway=tertiary + highway=tertiary_link + +osm_filter_lvl3: + highway=unclassified + highway=residential + highway=road + +osm_filter_lvl4: + highway=living_street + highway=pedestrian + highway=service + psv=no + +osm_filter_lvl5: + bus=no + service=siding + access=permissive + access=private + access=no + service=parking_aisle + highway=footway + +# OSM entities to drop, as k=v. Applies to nodes, edges and +# relations. +# Nodes included in non-dropped ways are kept regardless of +# a matching drop filter. +# Ways included in non-dropped relations are kept regardless of +# a matching drop filter. + +osm_filter_drop: + area=yes + train=yes|no_match_ways + # access=no + public_transport=stop_area|no_match_nds|no_match_rels + type=multipolygon + railway=platform + railway=station + # service=parking_aisle + highway=proposed + highway=footway + highway=construction + building=yes + building=train_station + leisure=garden + leisure=park + +# Nodes that should act as "no-hup" nodes. These are nodes +# that are contained in multiple ways, but cannot be used +# to switch from one way to another (for example, a +# track crossing in rail networks) + +osm_filter_nohup: + +# Configuration of the OSM road restriction system +# We only support restriction with a single via node +# atm + +osm_node_negative_restriction: + restriction=no_right_turn + restriction=no_left_turn + restriction=no_u_turn + restriction=no_straight_on + restriction:bus=no_right_turn + restriction:bus=no_left_turn + restriction:bus=no_u_turn + restriction:bus=no_straight_on + +osm_node_positive_restriction: + restriction=only_left_turn + restriction=only_straight_on + restriction=only_right_turn + restriction:bus=only_left_turn + restriction:bus=only_straight_on + restriction:bus=only_right_turn + +osm_filter_no_restriction: + except=psv|mult_val_match + except=bus|mult_val_match + +# Edges that should act as one-way nodes. + +osm_filter_oneway: + junction=roundabout # oneway=yes is implied + highway=motorway # oneway=yes is implied + oneway=yes + oneway=1 + oneway=true + oneway:bus=yes + oneway:bus=1 + oneway:bus=true + oneway:psv=yes + oneway:psv=1 + oneway:psv=true + +osm_filter_oneway_reverse: + oneway=-1 + +# Edges that may explicitely be used in +# both directions. May be used to set exception +# to "osm_filter_oneway" + +osm_filter_undirected: + oneway=false + oneway=0 + oneway=alternating + oneway=reversible + oneway=no + oneway:bus=no + oneway:bus=0 + oneway:bus=false + oneway:psv=no + oneway:psv=0 + oneway:psv=false + busway=opposite_lane + busway=opposite + busway:left=opposite_lane + busway:right=opposite_lane + psv=opposite_lane + psv=opposite + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + public_transport=stop_position + bus_stop=* + stop=* + highway=bus_stop + amenity=bus_station + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + line_name=ref,name # careful, no space after/before comma allowed! + from_name=from + to_name=to + + +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10 , 50, 100 + +osm_max_snap_level: 5 + +osm_max_osm_station_distance: 7.5 + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + uic_name + +# the track number tag in stop nodes, first one is taken +osm_track_number_tags: local_ref + +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.25 +routing_lvl2_fac: 1.5 +routing_lvl3_fac: 1.75 +routing_lvl4_fac: 2.25 +routing_lvl5_fac: 3 +routing_lvl6_fac: 4 +routing_lvl7_fac: 5 + +# Punishment (in meters) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_punish: 500 + +routing_station_distance_punish_fac: 2.5 + +routing_non_osm_station_punish: 500 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 20 + +# Max angle in a route from a station to an already reachable neighbor +routing_snap_full_turn_angle: 110 + +osm_max_node_block_distance: 10 + +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 0 + +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 4 + +routing_one_way_edge_punish: 5000 + +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +# routing_line_unmatched_punish_fac: 1.75 + +[tram, subway, funicular] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + route=tram + railway=subway + railway=light_rail + railway=tram + railway=funicular + railway=station + railway=halt + railway=tram_stop + route=subway + route=light_rail + subway=yes + tram=yes + +osm_filter_lv2: + service=siding + +osm_filter_lvl5: + service=crossover + service=yard + +# OSM entities to drop, as k=v. Applies to nodes, edges and +# relations. +# Nodes included in non-dropped ways are kept regardless of +# a matching drop filter. +# Ways included in non-dropped relations are kept regardless of +# a matching drop filter. + +osm_filter_drop: + area=yes + public_transport=stop_area + type=multipolygon + railway=platform + public_transport=platform + service=alley + +# Nodes that should act as "no-hup" nodes. These are nodes +# that are contained in multiple ways, but cannot be used +# to switch from one way to another (for example, a +# track crossing in rail networks) + +osm_filter_nohup: + railway:switch=no + railway=railway_crossing + +# Edges that should act as one-way nodes. + +osm_filter_oneway: + oneway=yes + +# Edges that may explicitely be used in +# both directions. May be used to set exception +# to "osm_filter_oneway" + +osm_filter_undirected: + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + public_transport=stop_position + station=subway + station=tram + railway=stop + railway=halt + railway=station + railway=tram_stop + railway=subway_stop + tram_stop=* + stop=* + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + line_name=ref,name # careful, no space after/before comma allowed! + from_name=from + to_name=to + + +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10, 50, 100 + +osm_max_snap_level: 4 + + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + uic_name + +# the track number tag in stop nodes, first one is taken +osm_track_number_tags: local_ref + +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.5 +routing_lvl2_fac: 2 +routing_lvl3_fac: 2.5 +routing_lvl4_fac: 3.5 +routing_lvl5_fac: 5 +routing_lvl6_fac: 5 +routing_lvl7_fac: 5 + +# Punishment (in meters) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_punish: 2000 + +routing_station_distance_punish_fac: 3.14 + +routing_non_osm_station_punish: 235 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 80 + +# Max angle in a route from a station to an already reachable neighbar +routing_snap_full_turn_angle: 80 + +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 100 + +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 1 + +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +routing_line_unmatched_punish_fac: 0.5 + +[ferry] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + route=ferry + waterway=river + motorboat=yes + ferry=yes + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + public_transport=stop_position + station=ferry + railway=stop + railway=halt + railway=station + stop=* + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + line_name=ref,name # careful, no space after/before comma allowed! + from_name=from + to_name=to + + +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10, 100, 200 + +osm_max_snap_level: 4 + + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + uic_name + +# the track number tag in stop nodes, first one is taken +osm_track_number_tags: local_ref + +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.5 +routing_lvl2_fac: 2 +routing_lvl3_fac: 2.5 +routing_lvl4_fac: 3.5 +routing_lvl5_fac: 5 +routing_lvl6_fac: 5 +routing_lvl7_fac: 5 + +# Punishment (in meters) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_punish: 100 + +routing_station_distance_punish_fac: 3.14 + +routing_non_osm_station_punish: 50 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 45 + +# Max angle in a route from a station to an already reachable neighbar +routing_snap_full_turn_angle: 0 + +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 0 + +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 1 + +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +routing_line_unmatched_punish_fac: 0.5 + +[tram, bus, subway, rail, gondola, funicular, ferry] +# Regular expressions and station comparision is +# always case insensitive! +station_normalize_chain: + , -> ' '; + - -> ' '; + — -> ' '; + _ -> ' '; + " -> ''; + ' -> ''; + ` -> ''; + \( -> ' '; + \) -> ' '; + \[ -> ' '; + \] -> ' '; + / -> ' '; + '\\' -> ' '; + < -> ' '; + > -> ' '; + & -> '+'; + ä -> ae; + ö -> oe; + ü -> ue; + ß -> ss; + è -> e; + é -> e; + á -> a; + à -> a; + ó -> o; + ò -> o; + ô -> o; + ç -> c; + í -> i; + ú -> u; + ù -> u; + ë -> e; + å -> ae; + â -> a; + ê -> e; + ï -> i; + œ -> oe; + ø -> oe; + str\. -> strasse; + av\. -> avenue; + + # always separate 'street', 'strasse' + '([a-zA-Z])strasse($| )' -> '\1 strasse\2'; + '([a-zA-Z])street($| )' -> '\1 street\2'; + + # always use "street" + '(^| )strasse($| )' -> '\1street\2'; + + # always use "avenue" + '(^| )avenida($| )' -> '\1avenue\2'; + '(^| )avenu($| )' -> '\1avenue\2'; + + # normalize every possible abbr. of german "Bahnhof", "Hauptbahnhof", "Busbahnhof" + '(^| )hauptbf\.($| )' -> '\1hauptbahnhof\2'; + '(^| )hauptbf($| )' -> '\1hauptbahnhof\2'; + '(^| )hauptbhf\.($| )' -> '\1hauptbahnhof\2'; + '(^| )hauptbhf($| )' -> '\1hauptbahnhof\2'; + '(^| )zentraler busbahnhof($| )$' -> \1busbahnhof\2; + '(^| )zentraler omnibusbahnhof($| )$' -> \1busbahnhof\2; + '(^| )omnibusbahnhof($| )' -> '\1busbahnhof\2'; + '(^| )omnibusbhf($| )' -> '\1busbahnhof\2'; + '(^| )busbf\.($| )' -> '\1busbahnhof\2'; + '(^| )busbf($| )' -> '\1busbahnhof\2'; + '(^| )bus bf\.($| )' -> '\1busbahnhof\2'; + '(^| )bus bf($| )' -> '\1busbahnhof\2'; + '(^| )busbhf\.($| )' -> '\1busbahnhof\2'; + '(^| )busbhf($| )' -> '\1busbahnhof\2'; + '(^| )bus bhf\.($| )' -> '\1busbahnhof\2'; + '(^| )bus bhf($| )' -> '\1busbahnhof\2'; + '(^| )zob($| )' -> '\1busbahnhof\2'; + '(^| )hbf\.($| )' -> '\1hauptbahnhof\2'; + '(^| )hbf($| )' -> '\1hauptbahnhof\2'; + '(^| )hb\.($| )' -> '\1hauptbahnhof\2'; + '(^| )hb($| )' -> '\1hauptbahnhof\2'; + '(^| )bf\.($| )' -> '\1bahnhof\2'; + '(^| )bf($| )' -> '\1bahnhof\2'; + '(^| )bhf\.($| )' -> '\1bahnhof\2'; + '(^| )bhf($| )' -> '\1bahnhof\2'; + '(^| )bhfeingang($| )' -> '\1bahnhofeingang\2'; + '(^| )gare de($| )' -> '\1gare\2'; + + + # if a stations starts with single station identifier + # always put it at the end (for example, "hauptbahnhof freiburg" becomes "freiburg hauptbahnhof") + '^hauptbahnhof (.+)$' -> \1 hauptbahnhof; + '^bahnhof (.+)$' -> \1 bahnhof; + '^busbahnhof (.+)$' -> \1 busbahnhof; + '^gare (.+)$' -> \1 gare; + '^station (.+)$' -> \1 station; + + '(^| )busbahnhof($| )' -> '\1bbahnhof\2'; + + # normalize line types in station names + '(^| )u bahn\.($| )' -> '\1ubahn\2'; + '(^| )metro\.($| )' -> '\1ubahn\2'; + '(^| )subway\.($| )' -> '\1ubahn\2'; + '(^| )underground\.($| )' -> '\1ubahn\2'; + '(^| )ubahn($| )' -> '\1u\2'; + '(^| )s bahn\.($| )' -> '\1sbahn\2'; + '(^| )sbahn($| )' -> '\1s\2'; + '(^| )tramway($| )' -> '\1tram\2'; + '(^| )stadtbahn($| )' -> '\1tram\2'; + '(^| )strassenbahn($| )' -> '\1tram\2'; + '(^| )streetcar($| )' -> '\1tram\2'; + '(^| )tram($| )' -> '\1t\2'; + + # delete track information from name + '(^| )kante [a-zA-Z0-9]{1,2}($| )' -> ' '; + '(^| )gleis [a-zA-Z0-9]{1,2}($| )' -> ' '; + '(^| )track [a-zA-Z0-9]{1,2}($| )' -> ' '; + '(^| )voie [a-zA-Z0-9]{1,2}($| )' -> ' '; + + # abbrv + '(^| )und($| )' -> '\1+\2'; + '(^| )and($| )' -> '\1+\2'; + '(^| )et($| )' -> '\1+\2'; + + # noise + '\sde\s' -> ' '; + '\sda\s' -> ' '; + '\sdi\s' -> ' '; + '\sdel\s' -> ' '; + '\sdal\s' -> ' '; + + # abbrv in most western languages + '(^| )saint ' -> '\1st. '; + '(^| )sankt ' -> '\1st. '; + '(^| )sanct ' -> '\1st. '; + + \. -> ' '; + + # whitespace + \s+ -> ' '; + ^\s -> ''; + \s$ -> ''; + +line_normalize_chain: + , -> ' '; + - -> ' '; + _ -> ' '; + " -> ''; + ' -> ''; + ` -> ''; + / -> ' '; + < -> ' '; + > -> ' '; + & -> '+'; + ä -> ae; + ö -> oe; + ü -> ue; + ß -> ss; + è -> e; + é -> e; + á -> a; + à -> a; + ó -> o; + ò -> o; + í -> i; + ú -> u; + ù -> u; + ë -> e; + å -> ae; + ç -> c; + â -> a; + ê -> e; + ï -> i; + œ -> oe; + ø -> oe; + ^line -> ''; + ^linie -> ''; + ^metro -> ''; + ^tram -> ''; + ^strassenbahn -> ''; + ^bus -> ''; + + # delete everything in brackets + \(.+\) -> ' '; + \[.+\] -> ' '; + + # whitespace + \s+ -> ' '; + ^\s -> ''; + \s$ -> ''; + + # line/number combs ALWAYS without whitespace (T 2 -> T2) + ^([a-zA-Z]+) ([0-9]+)$ -> \1\2; + +track_normalize_chain: + '(^| )gleis($| )' -> ''; + '(^| )gl\.($| )' -> ''; + '(^| )platform($| )' -> ''; + '(^| )track($| )' -> ''; + '(^| )rail($| )' -> ''; + # line/number combs ALWAYS without whitespace (1 A -> 1A) + ^([a-zA-Z]+) ([0-9]+)$ -> \1\2; + ^([0-9]+) ([a-zA-Z]+)$ -> \1\2; + + # delete track numbers greater than 999 + ^[0-9]{4,}$ -> ''; diff --git a/geo/pfaedle.qgs b/geo/pfaedle.qgs index aa197d3..2ec03bf 100644 --- a/geo/pfaedle.qgs +++ b/geo/pfaedle.qgs @@ -1,140 +1,176 @@ - + + - + + + + +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs + 3857 + 3857 + EPSG:3857 + WGS 84 / Pseudo-Mercator + merc + WGS84 + false + + + - + - + - + - + - + - + - + - - + + - + - + - + - + + + + + + + - + - + + + OGRGeoJSON_Point20180203134333739 + OGRGeoJSON_LineString20180203134333975 + OGRGeoJSON_Point20180206114956218 + OGRGeoJSON_LineString20180206114956229 + path20180217155708341 + trgraph_trgraph_LineString20180508200527144 + trgraph_trgraph_Point20180508200527256 + OSM_Transportation20181215024818603 + OpenStreetMap_de20181215024846026 + + + + + + + + + + + + - + meters - 866081.24618305882904679 - 6076552.62015097495168447 - 870662.44395622855518013 - 6080842.96235341485589743 + -374853.74009754881262779 + 4605645.85081499628722668 + 2640376.65992686524987221 + 7553306.65061968378722668 0 - 1 - +proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs - 1353 - 3395 - EPSG:3395 - WGS 84 / World Mercator + +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs + 3857 + 3857 + EPSG:3857 + WGS 84 / Pseudo-Mercator merc WGS84 false 0 - - - - - - - - - - path20180217155708341 - OGRGeoJSON_Point20180203134333739 - OGRGeoJSON_LineString20180203134333975 - OGRGeoJSON_Point20180206114956218 - OGRGeoJSON_LineString20180206114956229 - OSM_Transportation20181215024818603 - OpenStreetMap_de20181215024846026 - - + - + + + + + + + + + - + - + 6.70734330570000026 47.04982400000000098 @@ -159,1127 +195,1730 @@ true + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + + + + + false + + + + + + + + + + + + ogr + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + 1 + 1 + 1 + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - + 0 0 - 0 - station_alt_names - - - - - - - + 1 + + + + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - . + + + + + . 0 . @@ -1302,35 +1941,55 @@ def my_form_open(dialog, layer, feature): ]]> 0 generatedlayout + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - station_name + - + 6.70734330570000026 47.05522500000000008 @@ -1355,129 +2014,480 @@ def my_form_open(dialog, layer, feature): true - - - - - + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + + + + + false + + + + + + + + + + + + ogr + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + 1 + 1 + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - . + + + + + . 0 . @@ -1500,35 +2510,53 @@ def my_form_open(dialog, layer, feature): ]]> 0 generatedlayout + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - station_name + - + 6.70734330570000026 47.04982400000000098 @@ -1553,326 +2581,615 @@ def my_form_open(dialog, layer, feature): true + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + + + + + false + + + + + + + + + + + + ogr + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + 1 + 1 + 1 + + - - - - - + + + + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + - + 0 0 - 0 - station_alt_names - - - - - - - + 1 + + + + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - . + + + + + . 0 . @@ -1895,39 +3212,59 @@ def my_form_open(dialog, layer, feature): ]]> 0 generatedlayout + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - station_name + - + 6.70734330570000026 47.05522500000000008 @@ -1952,208 +3289,432 @@ def my_form_open(dialog, layer, feature): true + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + + + + + false + + + + + + + + + + + + ogr + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + 1 + 1 + 1 + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - + - - + 0 0 - 0 - station_alt_names - - - - - - - + 1 + + + + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - . + + + + + . 0 . @@ -2176,34 +3737,53 @@ def my_form_open(dialog, layer, feature): ]]> 0 generatedlayout + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - station_name + - + -20037508.34278924390673637 -20037508.34278925508260727 @@ -2223,33 +3803,70 @@ def my_form_open(dialog, layer, feature): 3857 3857 EPSG:3857 - WGS 84 / Pseudo Mercator + WGS 84 / Pseudo-Mercator merc WGS84 false - - - + + + + + + + + + + + + + + 0 + 0 + + + + + false + + + + wms - + + + 1 + 1 + 1 + + + + - + + + None + WholeRaster + Estimated + 0.02 + 0.98 + 2 + - + 0 - + -20037508.34278924390673637 -20037508.34278925508260727 @@ -2269,33 +3886,70 @@ def my_form_open(dialog, layer, feature): 3857 3857 EPSG:3857 - WGS 84 / Pseudo Mercator + WGS 84 / Pseudo-Mercator merc WGS84 false - - - + + + + + + + + + + + + + + 0 + 0 + + + + + false + + + + wms - + + + 1 + 1 + 1 + + + + - + + + None + WholeRaster + Estimated + 0.02 + 0.98 + 2 + - + 0 - + 6.70738836720000009 47.05522500000000008 @@ -2320,147 +3974,193 @@ def my_form_open(dialog, layer, feature): true + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + + + + + false + + + + + + + + + + + + ogr + + - - + + + 1 + 1 + 1 + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - + - - + 0 0 - 0 - - - - - - - - + 0.7 + + + + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + - - + + + + + + - + - - . + + + + + . 0 . @@ -2483,96 +4183,1965 @@ def my_form_open(dialog, layer, feature): ]]> 0 generatedlayout + + + + ver + + + trgraph_trgraph_LineString20180508200527144 + ./trgraph.json|layerid=0|geometrytype=LineString + + + + trgraph trgraph LineString + + + +proj=longlat +datum=WGS84 +no_defs + 3452 + 4326 + EPSG:4326 + WGS 84 + longlat + WGS84 + true + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + + + + + false + + + + + + + + + + + + + ogr + + + + + + + + + + + 1 + 1 + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + . + + 0 + . + + 0 + generatedlayout + + + + id + + + trgraph_trgraph_Point20180508200527256 + ./trgraph.json|layerid=0|geometrytype=Point + + + + trgraph trgraph Point + + + +proj=longlat +datum=WGS84 +no_defs + 3452 + 4326 + EPSG:4326 + WGS 84 + longlat + WGS84 + true + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + + + + + false + + + + + + + + + + + + + ogr + + + + + + + + + + + 1 + 1 + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + + 0 + . + + 0 + generatedlayout + + + + + id + + + + + + + + + + + - - - - - - - false - + + 255 + + + + + true + - - - false - - - - - - 2 - true - MU - - - false - - - - false - - WGS84 - - 8 - false - - - - - - - 0 - 255 - 255 - 255 - 255 - 255 - 255 - - - - - - - - None - - - false - - +proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs - EPSG:3395 - 1353 + +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs + EPSG:3857 + 3857 1 - - - - - true - 255 - - - conditions unknown - 90 meters m2 + + + + + false + + + + + + + + + + 30 + false + true + 0 + false + 0 + 16 + 50 + false + + false + + + 2 + D + true + + + + + conditions unknown + + false + + + + + false + + 255 + 255 + 240 + 0 + 240 + 255 + 240 + + 90 + + + + + + + 2 + 2 + 2 + 2 + 2 + 2 + 2 + + off + + OGRGeoJSON_LineString20180203134333975 + OGRGeoJSON_LineString20180206114956229 + OGRGeoJSON_Point20180203134333739 + OGRGeoJSON_Point20180206114956218 + path20180217155708341 + trgraph_trgraph_LineString20180508200527144 + trgraph_trgraph_Point20180508200527256 + + + + 0.000000 + 0.000000 + 0.000000 + 0.000000 + 0.000000 + 0.000000 + 0.000000 + + 0 + 2 + + to_vertex + to_vertex + to_vertex + to_vertex + to_vertex + to_vertex_and_segment + to_vertex_and_segment + + + enabled + enabled + enabled + enabled + enabled + disabled + disabled + + current_layer + + + WGS84 + + None + + + + false + + 8 + + false + + + + + + + + + + + + + + + diff --git a/pfaedle.cfg b/pfaedle.cfg index 07cc68c..8840534 100644 --- a/pfaedle.cfg +++ b/pfaedle.cfg @@ -3,12 +3,6 @@ # Authors: Patrick Brosi [tram, bus, coach, subway, rail, gondola, funicular, ferry] - -routing_transition_penalty_fac: 0.0083 -routing_station_move_penalty_fac: 0.0039 - -station_similarity_classification_method: jaccard-geodist - # Regular expressions and station comparision is # always case insensitive! station_normalize_chain: @@ -31,12 +25,6 @@ station_normalize_chain: Ä -> Ae; Ö -> Oe; Ü -> Ue; - À -> A; - Ò -> O; - Ù -> U; - Á -> A; - Ó -> O; - Ú -> U; ä -> ae; ö -> oe; ü -> ue; @@ -211,9 +199,6 @@ line_normalize_chain: # line/number combs ALWAYS without whitespace (T 2 -> T2) ^([a-zA-Z]+) ([0-9]+)$ -> \1\2; - # delete extra line specifier in data for Vitoria-Gasteiz (L2 -> 2) - ^l([0-9a-zA-Z]+)$ -> \1; - track_normalize_chain: '(^| )gleis($| )' -> ''; '(^| )gl\.($| )' -> ''; @@ -349,9 +334,22 @@ osm_line_relation_tags: from_name=from to_name=to -# max distance in meters between a snapped position on an -# edge and the input GTFS/OSM station -osm_max_snap_distance: 200 +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + [public_transport=stop_area]uic_ref=500 + [public_transport=stop_area]wikidata=500 + name=100 + [public_transport=stop_area]name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10, 100, 200 # max edge level to which station will be snapped osm_max_snap_level: 2 @@ -374,28 +372,24 @@ osm_track_number_tags: local_ref ref -# avg speed on segment levels, in km/h -osm_lvl0_avg_speed: 120 # default level -osm_lvl1_avg_speed: 90 -osm_lvl2_avg_speed: 65 -osm_lvl3_avg_speed: 50 -osm_lvl4_avg_speed: 30 -osm_lvl5_avg_speed: 20 -osm_lvl6_avg_speed: 10 -osm_lvl7_avg_speed: 5 +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.25 +routing_lvl2_fac: 1.5 +routing_lvl3_fac: 2 +routing_lvl4_fac: 2.5 +routing_lvl5_fac: 3.5 +routing_lvl6_fac: 5 +routing_lvl7_fac: 7 -# Punishment (in seconds) to add to the distance +# Punishment (in meters) to add to the distance # function if a vehicle performans a full turn -routing_full_turn_penalty: 180 # 3 minutes +routing_full_turn_punish: 3000 -# Penalty added to non-station placements -routing_non_station_penalty: 0.4 +routing_station_distance_punish_fac: 3.14 -# If the station name does not match, add this penalty -routing_station_unmatched_penalty: 0.4 +routing_non_osm_station_punish: 100 -# If the platform does not match, add this penalty -routing_platform_unmatched_penalty: 0.1 +routing_platform_unmatched_punish: 2000 # Max angle that should be counted as a full turn routing_full_turn_angle: 100 @@ -403,25 +397,24 @@ routing_full_turn_angle: 100 # Max angle in a route from a station to an already reachable neighbar routing_snap_full_turn_angle: 100 -# Factor by which the vehicle slows down in a one way street (factor 5 -# means it will take 5 times longer) -osm_one_way_speed_penalty_fac: 5 +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 100 -# Additional one-time time penalty for entering a one-way segment -# in seconds -osm_one_way_entry_cost: 300 +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 1 -# If a segment has no matching line attributes, multiply the -# time needed to traverse it with the given factor (should -# be > 1 for a punishment, values < 1 will prefer unmatching segments) -routing_line_unmatched_time_penalty_fac: 1.2 -routing_line_station_to_unmatched_time_penalty: 1.1 -routing_line_station_from_unmatched_time_penalty: 1.05 +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +routing_line_unmatched_punish_fac: 1 -# If a segment has no line attributes at all, multiply the -# time needed to traverse it with the given factor (should -# be > 1 for a punishment, values < 1 will prefer unmatching segments) -# routing_no_lines_penalty_fac: 1 +# Punishment factor for every meter a vehicle +# travels through an edge without any line +# information when no specific line was requested +# routing_no_lines_punish_fac: 0 # special line normalization for trains line_normalize_chain: @@ -565,12 +558,6 @@ osm_filter_keep: type=restriction:motorcar osm_filter_lvl1: - highway=trunk - highway=trunk_link - highway=primary - highway=primary_link - -osm_filter_lvl2: highway=secondary highway=secondary_link bus=yes @@ -587,22 +574,22 @@ osm_filter_lvl2: trolley_bus=yes psv=designated -osm_filter_lvl3: +osm_filter_lvl2: highway=tertiary highway=tertiary_link -osm_filter_lvl4: +osm_filter_lvl3: highway=unclassified highway=residential highway=road -osm_filter_lvl5: +osm_filter_lvl4: highway=living_street highway=pedestrian highway=service psv=no -osm_filter_lvl6: +osm_filter_lvl5: bus=no service=siding access=permissive @@ -610,7 +597,6 @@ osm_filter_lvl6: access=no service=parking_aisle highway=footway - highway=track # OSM entities to drop, as k=v. Applies to nodes, edges and # relations. @@ -728,12 +714,6 @@ osm_filter_station: highway=bus_stop amenity=bus_station -osm_filter_turning_circle: - highway=turning_circle - highway=turning_loop - junction=roundabout - highway=mini_roundabout - # Relation fields that should be used for catching the lines that # occur on an edge. Only relations that have been kept during the # filtering above will be checked. The 'linename' will be normalized @@ -747,18 +727,28 @@ osm_line_relation_tags: line_name=ref,name # careful, no space after/before comma allowed! from_name=from to_name=to - line_color=colour,color -# max distance in meters between a OSM station candidate -# and the input GTFS station -osm_max_station_cand_distance: 200 -# max distance in meters between a snapped position on an -# edge and the input GTFS/OSM station -osm_max_snap_distance: 100 +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10, 50, 100 + +osm_max_snap_fallback_distance: 300 osm_max_snap_level: 5 +osm_max_osm_station_distance: 8.0 + # sorted by priority, first found attr will be taken osm_station_name_attrs: name @@ -767,45 +757,22 @@ osm_station_name_attrs: # the track number tag in stop nodes, first one is taken osm_track_number_tags: local_ref -# avg speed on segment levels, in km/h -osm_lvl0_avg_speed: 85 # default level -osm_lvl1_avg_speed: 70 -osm_lvl2_avg_speed: 55 -osm_lvl3_avg_speed: 40 -osm_lvl4_avg_speed: 30 -osm_lvl5_avg_speed: 20 -osm_lvl6_avg_speed: 10 -osm_lvl7_avg_speed: 5 +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.25 +routing_lvl2_fac: 1.5 +routing_lvl3_fac: 1.75 +routing_lvl4_fac: 2.25 +routing_lvl5_fac: 3 +routing_lvl6_fac: 4 +routing_lvl7_fac: 5 -# Factor by which the vehicle slows down in a one way street (factor 5 -# means it will take 5 times longer) -osm_one_way_speed_penalty_fac: 5 - -# Additional one-time time penalty for entering a one-way segment -# in seconds -osm_one_way_entry_cost: 300 - -# If a segment has no matching line attributes, multiply the -# time needed to traverse it with the given factor (should -# be > 1 for a punishment, values < 1 will prefer unmatching segments) -routing_line_unmatched_time_penalty_fac: 1.2 -routing_line_station_to_unmatched_time_penalty: 1.1 -routing_line_station_from_unmatched_time_penalty: 1.05 - -# If a segment has no line attributes at all, multiply the -# time needed to traverse it with the given factor (should -# be > 1 for a punishment, values < 1 will prefer unmatching segments) -# routing_no_lines_penalty_fac: 1 - -# If the station name does not match, add this penalty -routing_station_unmatched_penalty: 0.4 - -# Punishment (in seconds) to add to the distance +# Punishment (in meters) to add to the distance # function if a vehicle performans a full turn -routing_full_turn_penalty: 120 # 2 minutes +routing_full_turn_punish: 500 -# Penalty added to non-station placements -routing_non_station_penalty: 0.4 +routing_station_distance_punish_fac: 2.5 + +routing_non_osm_station_punish: 500 # Max angle that should be counted as a full turn routing_full_turn_angle: 20 @@ -815,6 +782,26 @@ routing_snap_full_turn_angle: 110 osm_max_node_block_distance: 10 +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 0 + +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 4 + +routing_one_way_edge_punish: 5000 + +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +# routing_line_unmatched_punish_fac: 1.75 + +# Punishment factor for every meter a vehicle +# travels through an edge without any line +# information when no specific line was requested +# routing_no_lines_punish_fac: 0 [coach] @@ -863,14 +850,14 @@ osm_filter_lvl7: service=parking_aisle highway=footway -osm_lvl0_avg_speed: 120 # default level -osm_lvl1_avg_speed: 90 -osm_lvl2_avg_speed: 65 -osm_lvl3_avg_speed: 50 -osm_lvl4_avg_speed: 30 -osm_lvl5_avg_speed: 20 -osm_lvl6_avg_speed: 10 -osm_lvl7_avg_speed: 5 +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.15 +routing_lvl2_fac: 1.5 +routing_lvl3_fac: 1.75 +routing_lvl4_fac: 2.25 +routing_lvl5_fac: 2.5 +routing_lvl6_fac: 3 +routing_lvl7_fac: 4 osm_max_snap_level: 5 @@ -971,9 +958,20 @@ osm_line_relation_tags: from_name=from to_name=to -# max distance in meters between a snapped position on an -# edge and the input GTFS/OSM station -osm_max_snap_distance: 100 + +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10, 50, 100 osm_max_snap_level: 4 @@ -986,25 +984,22 @@ osm_station_name_attrs: # the track number tag in stop nodes, first one is taken osm_track_number_tags: local_ref -# avg speed on segment levels, in km/h -osm_lvl0_avg_speed: 85 # default level -osm_lvl1_avg_speed: 70 -osm_lvl2_avg_speed: 55 -osm_lvl3_avg_speed: 40 -osm_lvl4_avg_speed: 30 -osm_lvl5_avg_speed: 20 -osm_lvl6_avg_speed: 10 -osm_lvl7_avg_speed: 5 +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.5 +routing_lvl2_fac: 2 +routing_lvl3_fac: 2.5 +routing_lvl4_fac: 3.5 +routing_lvl5_fac: 5 +routing_lvl6_fac: 5 +routing_lvl7_fac: 5 -# Punishment (in seconds) to add to the distance +# Punishment (in meters) to add to the distance # function if a vehicle performans a full turn -routing_full_turn_penalty: 180 # 3 minutes +routing_full_turn_punish: 2000 -# Penalty added to non-station placements -routing_non_station_penalty: 0.4 +routing_station_distance_punish_fac: 3.14 -# If the station name does not match, add this penalty -routing_station_unmatched_penalty: 0.4 +routing_non_osm_station_punish: 235 # Max angle that should be counted as a full turn routing_full_turn_angle: 80 @@ -1012,21 +1007,24 @@ routing_full_turn_angle: 80 # Max angle in a route from a station to an already reachable neighbar routing_snap_full_turn_angle: 80 -# Factor by which the vehicle slows down in a one way street (factor 5 -# means it will take 5 times longer) -osm_one_way_speed_penalty_fac: 2 +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 100 -# If a segment has no matching line attributes, multiply the -# time needed to traverse it with the given factor (should -# be > 1 for a punishment, values < 1 will prefer unmatching segments) -routing_line_unmatched_time_penalty_fac: 1.2 -routing_line_station_to_unmatched_time_penalty: 1.1 -routing_line_station_from_unmatched_time_penalty: 1.05 +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 1 -# If a segment has no line attributes at all, multiply the -# time needed to traverse it with the given factor (should -# be > 1 for a punishment, values < 1 will prefer unmatching segments) -# routing_no_lines_penalty_fac: 1 +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +routing_line_unmatched_punish_fac: 0.5 + +# Punishment factor for every meter a vehicle +# travels through an edge without any line +# information when no specific line was requested +# routing_no_lines_punish_fac: 0 [gondola] @@ -1106,9 +1104,20 @@ osm_line_relation_tags: from_name=from to_name=to -# max distance in meters between a snapped position on an -# edge and the input GTFS/OSM station -osm_max_snap_distance: 100 + +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10, 50, 100 osm_max_snap_level: 4 @@ -1121,25 +1130,22 @@ osm_station_name_attrs: # the track number tag in stop nodes, first one is taken osm_track_number_tags: local_ref -# avg speed on segment levels, in km/h -osm_lvl0_avg_speed: 85 # default level -osm_lvl1_avg_speed: 70 -osm_lvl2_avg_speed: 55 -osm_lvl3_avg_speed: 40 -osm_lvl4_avg_speed: 30 -osm_lvl5_avg_speed: 20 -osm_lvl6_avg_speed: 10 -osm_lvl7_avg_speed: 5 +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.5 +routing_lvl2_fac: 2 +routing_lvl3_fac: 2.5 +routing_lvl4_fac: 3.5 +routing_lvl5_fac: 5 +routing_lvl6_fac: 5 +routing_lvl7_fac: 5 -# Punishment (in seconds) to add to the distance +# Punishment (in meters) to add to the distance # function if a vehicle performans a full turn -routing_full_turn_penalty: 120 # 2 minutes +routing_full_turn_punish: 2000 -# Penalty added to non-station placements -routing_non_station_penalty: 0.4 +routing_station_distance_punish_fac: 3.14 -# If the station name does not match, add this penalty -routing_station_unmatched_penalty: 0.4 +routing_non_osm_station_punish: 235 # Max angle that should be counted as a full turn routing_full_turn_angle: 80 @@ -1147,21 +1153,24 @@ routing_full_turn_angle: 80 # Max angle in a route from a station to an already reachable neighbar routing_snap_full_turn_angle: 80 -# Factor by which the vehicle slows down in a one way street (factor 5 -# means it will take 5 times longer) -osm_one_way_speed_penalty_fac: 2 +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 100 -# If a segment has no matching line attributes, multiply the -# time needed to traverse it with the given factor (should -# be > 1 for a punishment, values < 1 will prefer unmatching segments) -routing_line_unmatched_time_penalty_fac: 1.2 -routing_line_station_to_unmatched_time_penalty: 1.1 -routing_line_station_from_unmatched_time_penalty: 1.05 +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 1 -# If a segment has no line attributes at all, multiply the -# time needed to traverse it with the given factor (should -# be > 1 for a punishment, values < 1 will prefer unmatching segments) -# routing_no_lines_penalty_fac: 1 +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +routing_line_unmatched_punish_fac: 0.5 + +# Punishment factor for every meter a vehicle +# travels through an edge without any line +# information when no specific line was requested +# routing_no_lines_punish_fac: 0 [funicular] @@ -1272,9 +1281,20 @@ osm_line_relation_tags: from_name=from to_name=to -# max distance in meters between a snapped position on an -# edge and the input GTFS/OSM station -osm_max_snap_distance: 100 + +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10, 50, 100 osm_max_snap_level: 4 @@ -1287,25 +1307,22 @@ osm_station_name_attrs: # the track number tag in stop nodes, first one is taken osm_track_number_tags: local_ref -# avg speed on segment levels, in km/h -osm_lvl0_avg_speed: 85 # default level -osm_lvl1_avg_speed: 70 -osm_lvl2_avg_speed: 55 -osm_lvl3_avg_speed: 40 -osm_lvl4_avg_speed: 30 -osm_lvl5_avg_speed: 20 -osm_lvl6_avg_speed: 10 -osm_lvl7_avg_speed: 5 +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.5 +routing_lvl2_fac: 2 +routing_lvl3_fac: 2.5 +routing_lvl4_fac: 3.5 +routing_lvl5_fac: 5 +routing_lvl6_fac: 5 +routing_lvl7_fac: 5 -# Punishment (in seconds) to add to the distance +# Punishment (in meters) to add to the distance # function if a vehicle performans a full turn -routing_full_turn_penalty: 120 # 2 minutes +routing_full_turn_punish: 2000 -# Penalty added to non-station placements -routing_non_station_penalty: 0.4 +routing_station_distance_punish_fac: 3.14 -# If the station name does not match, add this penalty -routing_station_unmatched_penalty: 0.4 +routing_non_osm_station_punish: 235 # Max angle that should be counted as a full turn routing_full_turn_angle: 80 @@ -1313,21 +1330,24 @@ routing_full_turn_angle: 80 # Max angle in a route from a station to an already reachable neighbar routing_snap_full_turn_angle: 80 -# Factor by which the vehicle slows down in a one way street (factor 5 -# means it will take 5 times longer) -osm_one_way_speed_penalty_fac: 2 +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 100 -# If a segment has no matching line attributes, multiply the -# time needed to traverse it with the given factor (should -# be > 1 for a punishment, values < 1 will prefer unmatching segments) -routing_line_unmatched_time_penalty_fac: 1.2 -routing_line_station_to_unmatched_time_penalty: 1.1 -routing_line_station_from_unmatched_time_penalty: 1.05 +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 1 -# If a segment has no line attributes at all, multiply the -# time needed to traverse it with the given factor (should -# be > 1 for a punishment, values < 1 will prefer unmatching segments) -# routing_no_lines_penalty_fac: 1 +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +routing_line_unmatched_punish_fac: 0.5 + +# Punishment factor for every meter a vehicle +# travels through an edge without any line +# information when no specific line was requested +# routing_no_lines_punish_fac: 0 [ferry] @@ -1341,18 +1361,12 @@ osm_filter_keep: waterway=river motorboat=yes ferry=yes - amenity=ferry_terminal - mooring=ferry - station=ferry # Nodes that are stations. # Only nodes that have been kept during the filtering above will be # checked. osm_filter_station: - ferry=yes public_transport=stop_position - amenity=ferry_terminal - mooring=ferry station=ferry railway=stop railway=halt @@ -1373,9 +1387,20 @@ osm_line_relation_tags: from_name=from to_name=to -# max distance in meters between a snapped position on an -# edge and the input GTFS/OSM station -osm_max_snap_distance: 500 + +# attr name together with the +# max distance in meters between any of the groups members and +# a potential new member +# first matching rule will be taken +# only applies to nodes that match osm_filter_station! +osm_station_group_attrs: + uic_ref=500 + wikidata=500 + name=100 + +# max distance in meters between a snapped station position and the +# original station position +osm_max_snap_distance: 10, 100, 200 osm_max_snap_level: 4 @@ -1388,25 +1413,22 @@ osm_station_name_attrs: # the track number tag in stop nodes, first one is taken osm_track_number_tags: local_ref -# avg speed on segment levels, in km/h -osm_lvl0_avg_speed: 70 # default level -osm_lvl1_avg_speed: 60 -osm_lvl2_avg_speed: 50 -osm_lvl3_avg_speed: 35 -osm_lvl4_avg_speed: 30 -osm_lvl5_avg_speed: 25 -osm_lvl6_avg_speed: 10 -osm_lvl7_avg_speed: 5 +routing_lvl0_fac: 1 # default level +routing_lvl1_fac: 1.5 +routing_lvl2_fac: 2 +routing_lvl3_fac: 2.5 +routing_lvl4_fac: 3.5 +routing_lvl5_fac: 5 +routing_lvl6_fac: 5 +routing_lvl7_fac: 5 -# Punishment (in seconds) to add to the distance +# Punishment (in meters) to add to the distance # function if a vehicle performans a full turn -routing_full_turn_penalty: 120 # 2 minutes +routing_full_turn_punish: 100 -# Penalty added to non-station placements -routing_non_station_penalty: 0.4 +routing_station_distance_punish_fac: 3.14 -# If the station name does not match, add this penalty -routing_station_unmatched_penalty: 0.4 +routing_non_osm_station_punish: 50 # Max angle that should be counted as a full turn routing_full_turn_angle: 45 @@ -1414,18 +1436,22 @@ routing_full_turn_angle: 45 # Max angle in a route from a station to an already reachable neighbar routing_snap_full_turn_angle: 0 -# Factor by which the vehicle slows down in a one way street (factor 5 -# means it will take 5 times longer) -osm_one_way_speed_penalty_fac: 2 +# Punishment (in meters) to add to the distance +# function if a vehicle passes a station node without +# stopping there +routing_pass_thru_station_punish: 0 -# If a segment has no matching line attributes, multiply the -# time needed to traverse it with the given factor (should -# be > 1 for a punishment, values < 1 will prefer unmatching segments) -routing_line_unmatched_time_penalty_fac: 1.2 -routing_line_station_to_unmatched_time_penalty: 1.15 -routing_line_station_from_unmatched_time_penalty: 1.1 +# Punishment factor for every meter a vehicle +# travels through a one-way edge +routing_one_way_meter_punish_fac: 1 + +# Punishment factor for every meter a vehicle +# travels through an edge without any matching line +# information +routing_line_unmatched_punish_fac: 0.5 + +# Punishment factor for every meter a vehicle +# travels through an edge without any line +# information when no specific line was requested +# routing_no_lines_punish_fac: 0 -# If a segment has no line attributes at all, multiply the -# time needed to traverse it with the given factor (should -# be > 1 for a punishment, values < 1 will prefer unmatching segments) -# routing_no_lines_penalty_fac: 1 diff --git a/push-docker-image.sh b/push-docker-image.sh new file mode 100644 index 0000000..af6f3fb --- /dev/null +++ b/push-docker-image.sh @@ -0,0 +1,6 @@ +#!/bin/sh +set -ex +set -o pipefail + +echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin +docker push adfreiburg/pfaedle diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d60a569..7d54350 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,25 +1,14 @@ set(PFAEDLE_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR} ${CMAKE_CURRENT_BINARY_DIR}) +if (COMMAND cpplint_add_subdirectory) + cpplint_add_subdirectory(pfaedle) +endif() + include_directories( ${PFAEDLE_INCLUDE_DIR} ) -if (ZLIB_FOUND) - include_directories( ${ZLIB_INCLUDE_DIRS} ) -endif( ZLIB_FOUND ) - -if (LIBZIP_FOUND) - include_directories( SYSTEM ${LIBZIP_INCLUDE_DIR} ) - include_directories( SYSTEM ${LIBZIP_CONF_INCLUDE_DIR} ) -endif( LIBZIP_FOUND ) - -if (BZIP2_FOUND) - include_directories( SYSTEM ${BZIP2_INCLUDE_DIR} ) -endif( BZIP2_FOUND ) - add_subdirectory(util) add_subdirectory(pfaedle) add_subdirectory(cppgtfs) add_subdirectory(configparser) -add_subdirectory(shapevl) -add_subdirectory(xml) diff --git a/src/configparser b/src/configparser index ca166b3..b2d0c99 160000 --- a/src/configparser +++ b/src/configparser @@ -1 +1 @@ -Subproject commit ca166b3446d5bb8b5fb8c6f637ca3f9cb0a8ff3b +Subproject commit b2d0c99b9c84f62f5f7b259524e0f4b1c9d38318 diff --git a/src/cppgtfs b/src/cppgtfs index d26d579..c704a61 160000 --- a/src/cppgtfs +++ b/src/cppgtfs @@ -1 +1 @@ -Subproject commit d26d5794d396141905d71ecb8cd4f45e0120cba7 +Subproject commit c704a610d91ec2be575c42a760cc6157c8fdf932 diff --git a/src/pfaedle/CMakeLists.txt b/src/pfaedle/CMakeLists.txt index 7214555..22b083a 100644 --- a/src/pfaedle/CMakeLists.txt +++ b/src/pfaedle/CMakeLists.txt @@ -16,16 +16,5 @@ configure_file ( add_executable(pfaedle ${pfaedle_main}) add_library(pfaedle_dep ${pfaedle_SRC}) -include_directories(pfaedle_dep PUBLIC ${PROJECT_SOURCE_DIR}/src/xml/include/ ${PROJECT_SOURCE_DIR}/src/cppgtfs/src) - +include_directories(pfaedle_dep PUBLIC ${PROJECT_SOURCE_DIR}/src/cppgtfs/src) target_link_libraries(pfaedle pfaedle_dep util configparser ad_cppgtfs -lpthread) - -if (LIBZIP_FOUND) - target_link_libraries(pfaedle ${LIBZIP_LIBRARY}) -endif( LIBZIP_FOUND ) - -if (BZIP2_FOUND) - target_link_libraries(pfaedle ${BZIP2_LIBRARY}) -endif( BZIP2_FOUND ) - -add_subdirectory(tests) diff --git a/src/pfaedle/Def.h b/src/pfaedle/Def.h index 397dc9b..7b8e083 100644 --- a/src/pfaedle/Def.h +++ b/src/pfaedle/Def.h @@ -17,18 +17,47 @@ #define __str_c(s) s ## 1 #define __str_d(s) __str_c(s) -#if !defined(PFDL_PREC) || (__str_d(PFDL_PREC) == 1) -#undef PFDL_PREC -#define PFDL_PREC double +#if !defined(PFAEDLE_PRECISION) || (__str_d(PFAEDLE_PRECISION) == 1) +#undef PFAEDLE_PRECISION +#define PFAEDLE_PRECISION double #endif -#define PFDL_PREC_STR __str_a(PFDL_PREC) +#define PFAEDLE_PRECISION_STR __str_a(PFAEDLE_PRECISION) -#define POINT util::geo::Point -#define LINE util::geo::Line -#define BOX util::geo::Box -#define POLYLINE util::geo::PolyLine +#define POINT util::geo::Point +#define LINE util::geo::Line +#define BOX util::geo::Box +#define POLYLINE util::geo::PolyLine #define BOX_PADDING 2500 +namespace pfaedle { + +// _____________________________________________________________________________ +inline std::string getTmpFName(std::string dir, std::string postf) { + if (postf.size()) postf = "-" + postf; + if (!dir.size()) dir = util::getTmpDir(); + if (dir.size() && dir.back() != '/') dir = dir + "/"; + + std::string f = dir + ".pfaedle-tmp" + postf; + + size_t c = 0; + + while (access(f.c_str(), F_OK) != -1) { + c++; + if (c > 10000) { + // giving up... + LOG(ERROR) << "Could not find temporary file name!"; + exit(1); + } + std::stringstream ss; + ss << dir << ".pfaedle-tmp" << postf << "-" << std::rand(); + f = ss.str().c_str(); + } + + return f; +} + +} // namespace pfaedle + #endif // PFAEDLE_DEF_H_ diff --git a/src/pfaedle/PfaedleMain.cpp b/src/pfaedle/PfaedleMain.cpp index 0db163e..55bea77 100644 --- a/src/pfaedle/PfaedleMain.cpp +++ b/src/pfaedle/PfaedleMain.cpp @@ -9,31 +9,28 @@ #include #include #include - -#include #include #include #include #include - #include "ad/cppgtfs/Parser.h" #include "ad/cppgtfs/Writer.h" #include "pfaedle/config/ConfigReader.h" #include "pfaedle/config/MotConfig.h" #include "pfaedle/config/MotConfigReader.h" +#include "pfaedle/eval/Collector.h" #include "pfaedle/gtfs/Feed.h" #include "pfaedle/gtfs/Writer.h" #include "pfaedle/netgraph/Graph.h" #include "pfaedle/osm/OsmIdSet.h" #include "pfaedle/router/ShapeBuilder.h" -#include "pfaedle/router/Stats.h" -#include "pfaedle/statsimi-classifier/StatsimiClassifier.h" #include "pfaedle/trgraph/Graph.h" -#include "util/Misc.h" +#include "pfaedle/trgraph/StatGroup.h" #include "util/geo/output/GeoGraphJsonOutput.h" #include "util/geo/output/GeoJsonOutput.h" #include "util/json/Writer.h" #include "util/log/Log.h" +#include "util/Misc.h" #ifndef CFG_HOME_SUFFIX #define CFG_HOME_SUFFIX "/.config" @@ -45,30 +42,16 @@ #define CFG_FILE_NAME "pfaedle.cfg" #endif -using configparser::ParseFileExc; -using pfaedle::config::Config; -using pfaedle::config::ConfigReader; -using pfaedle::config::MotConfig; -using pfaedle::config::MotConfigReader; +using pfaedle::router::MOTs; using pfaedle::osm::BBoxIdx; using pfaedle::osm::OsmBuilder; -using pfaedle::router::DistDiffTransWeight; -using pfaedle::router::DistDiffTransWeightNoHeur; -using pfaedle::router::ExpoTransWeight; -using pfaedle::router::ExpoTransWeightNoHeur; -using pfaedle::router::MOTs; -using pfaedle::router::NormDistrTransWeight; -using pfaedle::router::NormDistrTransWeightNoHeur; -using pfaedle::router::Router; -using pfaedle::router::RouterImpl; +using pfaedle::config::MotConfig; +using pfaedle::config::Config; using pfaedle::router::ShapeBuilder; -using pfaedle::router::Stats; -using pfaedle::statsimiclassifier::BTSClassifier; -using pfaedle::statsimiclassifier::EDClassifier; -using pfaedle::statsimiclassifier::JaccardClassifier; -using pfaedle::statsimiclassifier::JaccardGeodistClassifier; -using pfaedle::statsimiclassifier::PEDClassifier; -using pfaedle::statsimiclassifier::StatsimiClassifier; +using configparser::ParseFileExc; +using pfaedle::config::MotConfigReader; +using pfaedle::config::ConfigReader; +using pfaedle::eval::Collector; enum class RetCode { SUCCESS = 0, @@ -86,9 +69,6 @@ enum class RetCode { std::string getFileNameMotStr(const MOTs& mots); std::vector getCfgPaths(const Config& cfg); -// _____________________________________________________________________________ -void gtfsWarnCb(std::string msg) { LOG(WARN) << msg; } - // _____________________________________________________________________________ int main(int argc, char** argv) { // disable output buffering for standard output @@ -97,11 +77,6 @@ int main(int argc, char** argv) { // initialize randomness srand(time(NULL) + rand()); // NOLINT - // use utf8 locale - std::setlocale(LC_ALL, "en_US.utf8"); - - T_START(total); - Config cfg; MotConfigReader motCfgReader; @@ -109,18 +84,20 @@ int main(int argc, char** argv) { cr.read(&cfg, argc, argv); std::vector gtfs(cfg.feedPaths.size()); + // feed containing the shapes in memory for evaluation + ad::cppgtfs::gtfs::Feed evalFeed; std::vector cfgPaths = getCfgPaths(cfg); try { - motCfgReader.parse(cfgPaths, cfg.motCfgParam); + motCfgReader.parse(cfgPaths); } catch (const configparser::ParseExc& ex) { LOG(ERROR) << "Could not parse MOT configurations, reason was:"; std::cerr << ex.what() << std::endl; exit(static_cast(RetCode::MOT_CFG_PARSE_ERR)); } - if (cfg.osmPath.empty() && !cfg.writeOverpass && !cfg.writeOsmfilter) { + if (cfg.osmPath.empty() && !cfg.writeOverpass) { std::cerr << "No OSM input file specified (-x), see --help." << std::endl; exit(static_cast(RetCode::NO_OSM_INPUT)); } @@ -131,42 +108,42 @@ int main(int argc, char** argv) { exit(static_cast(RetCode::NO_MOT_CFG)); } - T_START(gtfsBuild); - if (cfg.feedPaths.size() == 1) { if (cfg.inPlace) cfg.outputPath = cfg.feedPaths[0]; - if (!cfg.writeOverpass && !cfg.writeOsmfilter) - LOG(INFO) << "Reading GTFS feed " << cfg.feedPaths[0] << " ..."; + if (!cfg.writeOverpass) + LOG(INFO) << "Reading " << cfg.feedPaths[0] << " ..."; try { - ad::cppgtfs::Parser p(cfg.feedPaths[0], false, - cfg.parseAdditionalGTFSFields, - cfg.verbosity ? gtfsWarnCb : 0); - p.parse(>fs[0]); + ad::cppgtfs::Parser p; + p.parse(>fs[0], cfg.feedPaths[0]); + if (cfg.evaluate) { + // read the shapes and store them in memory + p.parseShapes(&evalFeed, cfg.feedPaths[0]); + } } catch (const ad::cppgtfs::ParserException& ex) { LOG(ERROR) << "Could not parse input GTFS feed, reason was:"; std::cerr << ex.what() << std::endl; exit(static_cast(RetCode::GTFS_PARSE_ERR)); } + if (!cfg.writeOverpass) LOG(INFO) << "Done."; } else if (cfg.writeOsm.size() || cfg.writeOverpass) { for (size_t i = 0; i < cfg.feedPaths.size(); i++) { - if (!cfg.writeOverpass && !cfg.writeOsmfilter) - LOG(INFO) << "Reading GTFS feed " << cfg.feedPaths[i] << " ..."; + if (!cfg.writeOverpass) + LOG(INFO) << "Reading " << cfg.feedPaths[i] << " ..."; + ad::cppgtfs::Parser p; try { - ad::cppgtfs::Parser p(cfg.feedPaths[i]); - p.parse(>fs[i]); + p.parse(>fs[i], cfg.feedPaths[i]); } catch (const ad::cppgtfs::ParserException& ex) { LOG(ERROR) << "Could not parse input GTFS feed, reason was:"; std::cerr << ex.what() << std::endl; exit(static_cast(RetCode::GTFS_PARSE_ERR)); } + if (!cfg.writeOverpass) LOG(INFO) << "Done."; } } else if (cfg.feedPaths.size() > 1) { std::cerr << "Multiple feeds only allowed in filter mode." << std::endl; exit(static_cast(RetCode::MULT_FEEDS_NOT_ALWD)); } - auto tGtfsBuild = T_STOP(gtfsBuild); - LOG(DEBUG) << "Read " << motCfgReader.getConfigs().size() << " unique MOT configs."; MOTs cmdCfgMots = cfg.mots; @@ -184,20 +161,12 @@ int main(int argc, char** argv) { } } - double maxSpeed = 0; - for (const auto& c : motCfgReader.getConfigs()) { - if (c.osmBuildOpts.maxSpeed > maxSpeed) { - maxSpeed = c.osmBuildOpts.maxSpeed; - } - } - if (cfg.writeOsm.size()) { LOG(INFO) << "Writing filtered XML to " << cfg.writeOsm << " ..."; BBoxIdx box(BOX_PADDING); - for (size_t i = 0; i < cfg.feedPaths.size(); i++) { ShapeBuilder::getGtfsBox(>fs[i], cmdCfgMots, cfg.shapeTripId, true, - &box, maxSpeed, 0, cfg.verbosity); + &box); } OsmBuilder osmBuilder; std::vector opts; @@ -219,7 +188,7 @@ int main(int argc, char** argv) { BBoxIdx box(BOX_PADDING); for (size_t i = 0; i < cfg.feedPaths.size(); i++) { ShapeBuilder::getGtfsBox(>fs[i], cmdCfgMots, cfg.shapeTripId, true, - &box, maxSpeed, 0, cfg.verbosity); + &box); } OsmBuilder osmBuilder; std::vector opts; @@ -231,27 +200,15 @@ int main(int argc, char** argv) { } osmBuilder.overpassQryWrite(&std::cout, opts, box); exit(static_cast(RetCode::SUCCESS)); - } else if (cfg.writeOsmfilter) { - BBoxIdx box(BOX_PADDING); - OsmBuilder osmBuilder; - std::vector opts; - for (const auto& o : motCfgReader.getConfigs()) { - if (std::find_first_of(o.mots.begin(), o.mots.end(), cmdCfgMots.begin(), - cmdCfgMots.end()) != o.mots.end()) { - opts.push_back(o.osmBuildOpts); - } - } - osmBuilder.osmfilterRuleWrite(&std::cout, opts, box); - exit(static_cast(RetCode::SUCCESS)); } else if (!cfg.feedPaths.size()) { std::cout << "No input feed specified, see --help" << std::endl; exit(static_cast(RetCode::NO_INPUT_FEED)); } - Stats stats; - double tOsmBuild = 0; - std::map> graphDimensions; - std::vector hopDists; + std::vector dfBins; + auto dfBinStrings = util::split(std::string(cfg.evalDfBins), ','); + for (auto st : dfBinStrings) dfBins.push_back(atof(st.c_str())); + Collector ecoll(cfg.evalPath, dfBins); for (const auto& motCfg : motCfgReader.getConfigs()) { std::string filePost; @@ -263,7 +220,7 @@ int main(int argc, char** argv) { filePost = getFileNameMotStr(usedMots); std::string motStr = pfaedle::router::getMotStr(usedMots); - LOG(INFO) << "Matching shapes for mots " << motStr; + LOG(INFO) << "Calculating shapes for mots " << motStr; try { pfaedle::router::FeedStops fStops = @@ -274,108 +231,62 @@ int main(int argc, char** argv) { pfaedle::osm::OsmBuilder osmBuilder; pfaedle::osm::BBoxIdx box(BOX_PADDING); - ShapeBuilder::getGtfsBox( - >fs[0], usedMots, cfg.shapeTripId, cfg.dropShapes, &box, - motCfg.osmBuildOpts.maxSpeed, &hopDists, cfg.verbosity); - - T_START(osmBuild); + ShapeBuilder::getGtfsBox(>fs[0], cmdCfgMots, cfg.shapeTripId, + cfg.dropShapes, &box); if (fStops.size()) osmBuilder.read(cfg.osmPath, motCfg.osmBuildOpts, &graph, box, - cfg.gridSize, &restr); + cfg.gridSize, &fStops, &restr); - tOsmBuild += T_STOP(osmBuild); - graphDimensions[filePost].first = graph.getNds().size(); - - for (const auto& nd : graph.getNds()) { - graphDimensions[filePost].second += nd->getAdjListOut().size(); + // TODO(patrick): move this somewhere else + for (auto& feedStop : fStops) { + if (feedStop.second) { + feedStop.second->pl().getSI()->getGroup()->writePens( + motCfg.osmBuildOpts.trackNormzer, + motCfg.routingOpts.platformUnmatchedPen, + motCfg.routingOpts.stationDistPenFactor, + motCfg.routingOpts.nonOsmPen); + } } - StatsimiClassifier* statsimiClassifier; - - if (motCfg.routingOpts.statsimiMethod == "bts") { - statsimiClassifier = new BTSClassifier(); - } else if (motCfg.routingOpts.statsimiMethod == "jaccard") { - statsimiClassifier = new JaccardClassifier(); - } else if (motCfg.routingOpts.statsimiMethod == "jaccard-geodist") { - statsimiClassifier = new JaccardGeodistClassifier(); - } else if (motCfg.routingOpts.statsimiMethod == "ed") { - statsimiClassifier = new EDClassifier(); - } else if (motCfg.routingOpts.statsimiMethod == "ped") { - statsimiClassifier = new PEDClassifier(); - } else { - LOG(ERROR) << "Unknown station similarity classifier " - << motCfg.routingOpts.statsimiMethod; - exit(1); - } - - Router* router = 0; - - if (motCfg.routingOpts.transPenMethod == "exp") { - if (cfg.noAStar) - router = new RouterImpl(); - else - router = new RouterImpl(); - } else if (motCfg.routingOpts.transPenMethod == "distdiff") { - if (cfg.noAStar) - router = new RouterImpl(); - else - router = new RouterImpl(); - } else if (motCfg.routingOpts.transPenMethod == "timenorm") { - if (cfg.noAStar) - router = new RouterImpl(); - else - router = new RouterImpl(); - } else { - LOG(ERROR) << "Unknown routing method " - << motCfg.routingOpts.transPenMethod; - exit(1); - } - - ShapeBuilder shapeBuilder(>fs[0], usedMots, motCfg, &graph, &fStops, - &restr, statsimiClassifier, router, cfg); - - pfaedle::netgraph::Graph ng; - - if (singleTrip) { - mkdir(cfg.dbgOutputPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); - std::ofstream pstr(cfg.dbgOutputPath + "/path.json"); - util::geo::output::GeoJsonOutput o(pstr); - - auto l = shapeBuilder.shapeL(singleTrip); - stats += l.second; - - LOG(INFO) << "Outputting path.json..."; - // reproject to WGS84 to match RFC 7946 - o.print(l.first, {}); - - o.flush(); - pstr.close(); - } else { - stats += shapeBuilder.shapeify(&ng); - } - - if (router) delete router; - if (statsimiClassifier) delete statsimiClassifier; + ShapeBuilder shapeBuilder(>fs[0], &evalFeed, cmdCfgMots, motCfg, &ecoll, + &graph, &fStops, &restr, cfg); if (cfg.writeGraph) { LOG(INFO) << "Outputting graph.json..."; util::geo::output::GeoGraphJsonOutput out; mkdir(cfg.dbgOutputPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); std::ofstream fstr(cfg.dbgOutputPath + "/graph.json"); - out.print(*shapeBuilder.getGraph(), fstr); + out.printLatLng(*shapeBuilder.getGraph(), fstr); fstr.close(); } - if (singleTrip) exit(static_cast(RetCode::SUCCESS)); + if (singleTrip) { + LOG(INFO) << "Outputting path.json..."; + mkdir(cfg.dbgOutputPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); + std::ofstream pstr(cfg.dbgOutputPath + "/path.json"); + util::geo::output::GeoJsonOutput o(pstr); + + auto l = shapeBuilder.shapeL(singleTrip); + + // reproject to WGS84 to match RFC 7946 + o.printLatLng(l, {}); + + o.flush(); + pstr.close(); + + exit(static_cast(RetCode::SUCCESS)); + } + + pfaedle::netgraph::Graph ng; + shapeBuilder.shape(&ng); if (cfg.buildTransitGraph) { util::geo::output::GeoGraphJsonOutput out; - LOG(INFO) << "Outputting trgraph-" + filePost + ".json..."; + LOG(INFO) << "Outputting trgraph" + filePost + ".json..."; mkdir(cfg.dbgOutputPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); - std::ofstream fstr(cfg.dbgOutputPath + "/trgraph-" + filePost + - ".json"); - out.print(ng, fstr); + std::ofstream fstr(cfg.dbgOutputPath + "/trgraph" + filePost + ".json"); + out.printLatLng(ng, fstr); fstr.close(); } } catch (const pfxml::parse_exc& ex) { @@ -385,58 +296,16 @@ int main(int argc, char** argv) { } } - // outputting stats - if (cfg.writeStats) { - util::json::Dict graphSizes; - - double numNodesTot = 0; - double numEdgesTot = 0; - - for (const auto& gd : graphDimensions) { - util::json::Dict a; - a["num_nodes"] = gd.second.first; - a["num_edges"] = gd.second.second; - numNodesTot += gd.second.first; - numEdgesTot += gd.second.second; - graphSizes[gd.first] = a; - } - - double hopDistSum = 0; - for (auto d : hopDists) hopDistSum += d; - - util::json::Dict jsonStats = { - {"statistics", - util::json::Dict{ - {"gtfs_num_stations", gtfs[0].getStops().size()}, - {"gtfs_num_trips", gtfs[0].getTrips().size()}, - {"gtfs_avg_hop_dist", hopDistSum / (hopDists.size() * 1.0)}, - {"graph_dimension", graphSizes}, - {"num_nodes_tot", numNodesTot}, - {"num_edges_tot", numEdgesTot}, - {"num_tries", stats.numTries}, - {"num_trie_leafs", stats.numTrieLeafs}, - {"dijkstra_iters", stats.dijkstraIters}, - {"time_solve", stats.solveTime}, - {"time_read_osm", tOsmBuild}, - {"time_read_gtfs", tGtfsBuild}, - {"time_tot", T_STOP(total)}, - {"peak-memory", util::readableSize(util::getPeakRSS())}, - {"peak-memory-bytes", util::getPeakRSS()}}}}; - - std::ofstream ofs; - ofs.open(cfg.dbgOutputPath + "/stats.json"); - util::json::Writer wr(&ofs, 10, true); - wr.val(jsonStats); - wr.closeAll(); - } + if (cfg.evaluate) ecoll.printStats(&std::cout); if (cfg.feedPaths.size()) { try { + mkdir(cfg.outputPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); LOG(INFO) << "Writing output GTFS to " << cfg.outputPath << " ..."; pfaedle::gtfs::Writer w; w.write(>fs[0], cfg.outputPath); } catch (const ad::cppgtfs::WriterException& ex) { - LOG(ERROR) << "Could not write output GTFS feed, reason was:"; + LOG(ERROR) << "Could not write final GTFS feed, reason was:"; std::cerr << ex.what() << std::endl; exit(static_cast(RetCode::GTFS_WRITE_ERR)); } @@ -447,27 +316,9 @@ int main(int argc, char** argv) { // _____________________________________________________________________________ std::string getFileNameMotStr(const MOTs& mots) { - MOTs tmp = mots; std::string motStr; - - std::string names[11] = {"tram", "subway", "rail", "bus", - "ferry", "cablecar", "gondola", "funicular", - "coach", "trolleybus", "monorail"}; - - for (const auto& n : names) { - const auto& types = ad::cppgtfs::gtfs::flat::Route::getTypesFromString(n); - const auto& isect = pfaedle::router::motISect(tmp, types); - - if (isect.size() == types.size()) { - if (motStr.size()) motStr += "-"; - motStr += n; - for (const auto& mot : isect) tmp.erase(mot); - } - } - - for (const auto& mot : tmp) { - if (motStr.size()) motStr += "-"; - motStr += ad::cppgtfs::gtfs::flat::Route::getTypeString(mot); + for (const auto& mot : mots) { + motStr += "-" + ad::cppgtfs::gtfs::flat::Route::getTypeString(mot); } return motStr; @@ -478,10 +329,12 @@ std::vector getCfgPaths(const Config& cfg) { if (cfg.configPaths.size()) return cfg.configPaths; std::vector ret; + // install prefix global configuration path, if available { - auto path = std::string(INSTALL_PREFIX) + std::string(CFG_DIR) + "/" + - "pfaedle" + "/" + CFG_FILE_NAME; + auto path = std::string(INSTALL_PREFIX) + + std::string(CFG_DIR) + "/" + "pfaedle" + "/" + + CFG_FILE_NAME; std::ifstream is(path); LOG(DEBUG) << "Testing for config file at " << path; @@ -493,8 +346,8 @@ std::vector getCfgPaths(const Config& cfg) { // local user configuration path, if available { - auto path = util::getHomeDir() + CFG_HOME_SUFFIX + "/" + "pfaedle" + "/" + - CFG_FILE_NAME; + auto path = util::getHomeDir() + CFG_HOME_SUFFIX + "/" + + "pfaedle" + "/" + CFG_FILE_NAME; std::ifstream is(path); LOG(DEBUG) << "Testing for config file at " << path; diff --git a/src/pfaedle/config/ConfigReader.cpp b/src/pfaedle/config/ConfigReader.cpp index 9e62a96..c9ef614 100644 --- a/src/pfaedle/config/ConfigReader.cpp +++ b/src/pfaedle/config/ConfigReader.cpp @@ -10,18 +10,16 @@ #include "pfaedle/Def.h" #include "pfaedle/_config.h" #include "pfaedle/config/ConfigReader.h" -#include "pfaedle/config/PfaedleConfig.h" #include "util/String.h" -#include "util/geo/Geo.h" #include "util/log/Log.h" using pfaedle::config::ConfigReader; -using std::exception; using std::string; +using std::exception; using std::vector; -static const char* YEAR = &__DATE__[7]; +static const char* YEAR = __DATE__ + 7; static const char* COPY = "University of Freiburg - Chair of Algorithms and Data Structures"; static const char* AUTHORS = "Patrick Brosi "; @@ -30,10 +28,11 @@ static const char* AUTHORS = "Patrick Brosi "; void ConfigReader::help(const char* bin) { std::cout << std::setfill(' ') << std::left << "pfaedle GTFS map matcher " << VERSION_FULL << "\n(built " << __DATE__ << " " << __TIME__ - << " with geometry precision <" << PFDL_PREC_STR << ">)\n\n" + << " with geometry precision <" << PFAEDLE_PRECISION_STR << ">)\n\n" << "(C) " << YEAR << " " << COPY << "\n" << "Authors: " << AUTHORS << "\n\n" - << "Usage: " << bin << " -x \n\n" + << "Usage: " << bin + << " -x \n\n" << "Allowed options:\n\n" << "General:\n" << std::setw(35) << " -v [ --version ]" @@ -44,8 +43,6 @@ void ConfigReader::help(const char* bin) { << "drop shapes already present in the feed and\n" << std::setw(35) << " " << " recalculate them\n" - << std::setw(35) << " --write-colors" - << "write matched route line colors, where missing\n" << "\nInput:\n" << std::setw(35) << " -c [ --config ] arg" << "pfaedle config file\n" @@ -65,11 +62,7 @@ void ConfigReader::help(const char* bin) { << std::setw(35) << " " << " ferry | boat | ship, cablecar, gondola,\n" << std::setw(35) << " " - << " funicular, coach, mono-rail | monorail,\n" - << std::setw(35) << " " - << " trolley | trolleybus | trolley-bus} or\n" - << std::setw(35) << " " - << " as GTFS mot codes\n" + << " funicular, coach} or as GTFS mot codes\n" << "\nOutput:\n" << std::setw(35) << " -o [ --output ] arg (=gtfs-out)" << "GTFS output path\n" @@ -90,6 +83,26 @@ void ConfigReader::help(const char* bin) { << "write routing graph as GeoJSON to\n" << std::setw(35) << " " << " /graph.json\n" + << std::setw(35) << " --write-cgraph" + << "if -T is set, write combination graph as\n" + << std::setw(35) << " " + << " GeoJSON to " + "/combgraph.json\n" + << std::setw(35) << " --method arg (=global)" + << "matching method to use, either 'global'\n" + << std::setw(35) << " " + << " (based on HMM), 'greedy' or " + "'greedy2'\n" + << std::setw(35) << " --eval" + << "evaluate existing shapes against matched\n" + << std::setw(35) << " " + << " shapes and print results\n" + << std::setw(35) << " --eval-path arg (=.)" + << "path for eval file output\n" + << std::setw(35) << " --eval-df-bins arg (= )" + << "bins to use for d_f histogram, comma sep.\n" + << std::setw(35) << " " + << " (e.g. 10,20,30,40)\n" << "\nMisc:\n" << std::setw(35) << " -T [ --trip-id ] arg" << "Do routing only for trip , write result \n" @@ -97,24 +110,12 @@ void ConfigReader::help(const char* bin) { << " to /path.json\n" << std::setw(35) << " --overpass" << "Output overpass query for matching OSM data\n" - << std::setw(35) << " --osmfilter" - << "Output osmfilter filter rules for matching OSM data\n" << std::setw(35) << " --grid-size arg (=2000)" - << "Approx. grid cell size in meters\n" - << std::setw(35) << " --no-fast-hops" - << "Disable fast hops technique\n" - << std::setw(35) << " --no-a-star" - << "Disable A* heuristic \n" - << std::setw(35) << " --no-trie" - << "Disable trip tries \n" - << std::setw(35) << " --no-hop-cache" - << "Disable hop cache \n" - << std::setw(35) << " --stats" - << "write stats to stats.json\n" - << std::setw(35) << " -W [ --warn ]" - << "enable verbose warning messages\n" - << std::setw(35) << " -P" - << "additional parameter string (in cfg file format)\n"; + << "Grid cell size\n" + << std::setw(35) << " --use-route-cache" + << "(experimental) cache intermediate routing\n" + << std::setw(35) << " " + << " results\n"; } // _____________________________________________________________________________ @@ -130,44 +131,49 @@ void ConfigReader::read(Config* cfg, int argc, char** argv) { {"mots", required_argument, NULL, 'm'}, {"grid-size", required_argument, 0, 'g'}, {"overpass", no_argument, 0, 'a'}, - {"osmfilter", no_argument, 0, 'f'}, {"osm-out", required_argument, 0, 'X'}, {"trip-id", required_argument, 0, 'T'}, {"write-graph", no_argument, 0, 1}, + {"write-cgraph", no_argument, 0, 2}, {"write-trgraph", no_argument, 0, 4}, + {"method", required_argument, 0, 5}, + {"eval", no_argument, 0, 3}, + {"eval-path", required_argument, 0, 6}, + {"eval-df-bins", required_argument, 0, 7}, {"dbg-path", required_argument, 0, 'd'}, {"version", no_argument, 0, 'v'}, {"help", no_argument, 0, 'h'}, {"inplace", no_argument, 0, 9}, - {"no-fast-hops", no_argument, 0, 10}, - {"no-a-star", no_argument, 0, 11}, - {"no-trie", no_argument, 0, 12}, - {"write-colors", no_argument, 0, 13}, - {"stats", no_argument, 0, 14}, - {"no-hop-cache", no_argument, 0, 15}, - {"gaussian-noise", required_argument, 0, 16}, - {"warn", no_argument, 0, 'W'}, - {"keep-additional-gtfs-fields", no_argument, 0, 'F'}, + {"use-route-cache", no_argument, 0, 8}, {0, 0, 0, 0}}; - int c; - while ((c = getopt_long(argc, argv, ":o:hvi:c:x:Dm:g:X:T:d:pP:FW", ops, 0)) != + char c; + while ((c = getopt_long(argc, argv, ":o:hvi:c:x:Dm:g:X:T:d:p", ops, 0)) != -1) { switch (c) { case 1: cfg->writeGraph = true; break; + case 2: + cfg->writeCombGraph = true; + break; + case 3: + cfg->evaluate = true; + break; case 4: cfg->buildTransitGraph = true; break; - case 10: - cfg->noFastHops = true; + case 5: + cfg->solveMethod = optarg; break; - case 11: - cfg->noAStar = true; + case 6: + cfg->evalPath = optarg; break; - case 12: - cfg->noTrie = true; + case 7: + cfg->evalDfBins = optarg; + break; + case 8: + cfg->useCaching = true; break; case 'o': cfg->outputPath = optarg; @@ -188,7 +194,7 @@ void ConfigReader::read(Config* cfg, int argc, char** argv) { motStr = optarg; break; case 'g': - cfg->gridSize = atof(optarg) / util::geo::M_PER_DEG; + cfg->gridSize = atof(optarg); break; case 'X': cfg->writeOsm = optarg; @@ -196,41 +202,22 @@ void ConfigReader::read(Config* cfg, int argc, char** argv) { case 'T': cfg->shapeTripId = optarg; break; - case 'P': - cfg->motCfgParam += std::string("\n") + optarg; - break; case 'd': cfg->dbgOutputPath = optarg; break; case 'a': cfg->writeOverpass = true; break; - case 'f': - cfg->writeOsmfilter = true; - break; case 9: cfg->inPlace = true; break; - case 13: - cfg->writeColors = true; - break; - case 14: - cfg->writeStats = true; - break; - case 15: - cfg->noHopCache = true; - break; - case 16: - cfg->gaussianNoise = atof(optarg); - break; - case 'W': - cfg->verbosity = 1; - break; - case 'F': - cfg->parseAdditionalGTFSFields = true; - break; case 'v': - std::cout << "pfaedle " << VERSION_FULL << std::endl; + std::cout << "pfaedle " << VERSION_FULL << " (built " << __DATE__ << " " + << __TIME__ << " with geometry precision <" + << PFAEDLE_PRECISION_STR << ">)\n" + << "(C) " << YEAR << " " << COPY << "\n" + << "Authors: " << AUTHORS << "\nGNU General Public " + "License v3.0\n"; exit(0); case 'p': printOpts = true; diff --git a/src/pfaedle/config/MotConfig.h b/src/pfaedle/config/MotConfig.h index f99f4eb..3b7ab48 100644 --- a/src/pfaedle/config/MotConfig.h +++ b/src/pfaedle/config/MotConfig.h @@ -17,11 +17,20 @@ struct MotConfig { router::MOTs mots; osm::OsmReadOpts osmBuildOpts; router::RoutingOpts routingOpts; - std::string transWeight; + std::map unproced; }; inline bool operator==(const MotConfig& a, const MotConfig& b) { - return a.osmBuildOpts == b.osmBuildOpts && a.routingOpts == b.routingOpts; + bool unprocedEq = a.unproced.size() == b.unproced.size(); + for (const auto& kv : a.unproced) { + if (!b.unproced.count(kv.first) || + b.unproced.find(kv.first)->second != kv.second) { + unprocedEq = false; + break; + } + } + return a.osmBuildOpts == b.osmBuildOpts && a.routingOpts == b.routingOpts && + unprocedEq; } } // namespace config diff --git a/src/pfaedle/config/MotConfigReader.cpp b/src/pfaedle/config/MotConfigReader.cpp index 52b8218..96fce0e 100644 --- a/src/pfaedle/config/MotConfigReader.cpp +++ b/src/pfaedle/config/MotConfigReader.cpp @@ -2,33 +2,28 @@ // Chair of Algorithms and Data Structures. // Authors: Patrick Brosi -#include #include #include #include "pfaedle/config/MotConfigReader.h" -#include "pfaedle/osm/OsmReadOpts.h" #include "util/Misc.h" #include "util/String.h" #include "util/log/Log.h" -using ad::cppgtfs::gtfs::Route; -using configparser::ConfigFileParser; -using configparser::ParseExc; -using pfaedle::config::MotConfig; using pfaedle::config::MotConfigReader; -using pfaedle::osm::DeepAttrRule; +using pfaedle::config::MotConfig; using pfaedle::osm::FilterRule; using pfaedle::osm::KeyVal; +using configparser::ConfigFileParser; +using configparser::ParseExc; +using pfaedle::osm::DeepAttrRule; using pfaedle::trgraph::ReplRules; - -double DEF_TRANS_PEN = 0.0083; +using ad::cppgtfs::gtfs::Route; // _____________________________________________________________________________ MotConfigReader::MotConfigReader() {} // _____________________________________________________________________________ -void MotConfigReader::parse(const std::vector& paths, - const std::string& literal) { +void MotConfigReader::parse(const std::vector& paths) { ConfigFileParser p; // parse explicitely given paths @@ -37,47 +32,17 @@ void MotConfigReader::parse(const std::vector& paths, p.parse(s); } - if (literal.size()) p.parseStr(literal); - for (const auto& sec : p.getSecs()) { - MotConfig cfg; - - cfg.transWeight = "expo"; - + MotConfig curCfg; std::string secStr = sec.first; if (secStr.empty()) continue; - - if (p.hasKey(secStr, "routing_emission_method")) { - cfg.routingOpts.emPenMethod = - p.getStr(secStr, "routing_emission_method"); - } else { - cfg.routingOpts.emPenMethod = "exp"; - } - - if (p.hasKey(secStr, "routing_transition_method")) { - cfg.routingOpts.transPenMethod = - p.getStr(secStr, "routing_transition_method"); - } else { - cfg.routingOpts.transPenMethod = "exp"; - } - - if (p.hasKey(secStr, "station_similarity_classification_method")) { - cfg.routingOpts.statsimiMethod = - p.getStr(secStr, "station_similarity_classification_method"); - } else { - cfg.routingOpts.statsimiMethod = "jaccard-geodist"; - } - - if (p.hasKey(secStr, "routing_use_stations")) { - cfg.routingOpts.useStations = p.getBool(secStr, "routing_use_stations"); - } else { - cfg.routingOpts.useStations = true; - } + std::set procedKeys; if (p.hasKey(secStr, "osm_filter_keep")) { + procedKeys.insert("osm_filter_keep"); for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_keep", ' ')) { auto fRule = getFRule(kvs); - cfg.osmBuildOpts.keepFilter[fRule.kv.first].insert( + curCfg.osmBuildOpts.keepFilter[fRule.kv.first].insert( osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); } } @@ -85,468 +50,321 @@ void MotConfigReader::parse(const std::vector& paths, for (uint8_t i = 0; i < 8; i++) { std::string name = std::string("osm_filter_lvl") + std::to_string(i); if (p.hasKey(secStr, name)) { + procedKeys.insert(name); for (const auto& kvs : p.getStrArr(sec.first, name, ' ')) { auto fRule = getFRule(kvs); - cfg.osmBuildOpts.levelFilters[i][fRule.kv.first].insert( + curCfg.osmBuildOpts.levelFilters[i][fRule.kv.first].insert( osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); } } } if (p.hasKey(secStr, "osm_filter_drop")) { + procedKeys.insert("osm_filter_drop"); for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_drop", ' ')) { auto fRule = getFRule(kvs); - cfg.osmBuildOpts.dropFilter[fRule.kv.first].insert( + curCfg.osmBuildOpts.dropFilter[fRule.kv.first].insert( osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); } } if (p.hasKey(secStr, "osm_max_snap_level")) { - cfg.osmBuildOpts.maxSnapLevel = p.getInt(sec.first, "osm_max_snap_level"); + procedKeys.insert("osm_max_snap_level"); + curCfg.osmBuildOpts.maxSnapLevel = + p.getInt(sec.first, "osm_max_snap_level"); } else { - cfg.osmBuildOpts.maxSnapLevel = 7; + curCfg.osmBuildOpts.maxSnapLevel = 7; } if (p.hasKey(secStr, "osm_filter_nohup")) { + procedKeys.insert("osm_filter_nohup"); for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_nohup", ' ')) { auto fRule = getFRule(kvs); - cfg.osmBuildOpts.noHupFilter[fRule.kv.first].insert( + curCfg.osmBuildOpts.noHupFilter[fRule.kv.first].insert( osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); } } if (p.hasKey(secStr, "osm_filter_oneway")) { + procedKeys.insert("osm_filter_oneway"); for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_oneway", ' ')) { auto fRule = getFRule(kvs); - cfg.osmBuildOpts.oneWayFilter[fRule.kv.first].insert( + curCfg.osmBuildOpts.oneWayFilter[fRule.kv.first].insert( osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); } } if (p.hasKey(secStr, "osm_filter_oneway_reverse")) { + procedKeys.insert("osm_filter_oneway_reverse"); for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_oneway_reverse", ' ')) { auto fRule = getFRule(kvs); - cfg.osmBuildOpts.oneWayFilterRev[fRule.kv.first].insert( + curCfg.osmBuildOpts.oneWayFilterRev[fRule.kv.first].insert( osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); } } if (p.hasKey(secStr, "osm_filter_undirected")) { + procedKeys.insert("osm_filter_undirected"); for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_undirected", ' ')) { auto fRule = getFRule(kvs); - cfg.osmBuildOpts.twoWayFilter[fRule.kv.first].insert( + curCfg.osmBuildOpts.twoWayFilter[fRule.kv.first].insert( osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); } } if (p.hasKey(secStr, "osm_filter_station")) { + procedKeys.insert("osm_filter_station"); for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_station", ' ')) { auto fRule = getFRule(kvs); - cfg.osmBuildOpts.stationFilter[fRule.kv.first].insert( + curCfg.osmBuildOpts.stationFilter[fRule.kv.first].insert( osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); } } if (p.hasKey(secStr, "osm_filter_station_blocker")) { + procedKeys.insert("osm_filter_station_blocker"); for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_station_blocker", ' ')) { auto fRule = getFRule(kvs); - cfg.osmBuildOpts.stationBlockerFilter[fRule.kv.first].insert( - osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); - } - } - - if (p.hasKey(secStr, "osm_filter_turning_circle")) { - for (const auto& kvs : - p.getStrArr(sec.first, "osm_filter_turning_circle", ' ')) { - auto fRule = getFRule(kvs); - cfg.osmBuildOpts.turnCycleFilter[fRule.kv.first].insert( + curCfg.osmBuildOpts.stationBlockerFilter[fRule.kv.first].insert( osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); } } if (p.hasKey(secStr, "osm_node_positive_restriction")) { + procedKeys.insert("osm_node_positive_restriction"); for (const auto& kvs : p.getStrArr(sec.first, "osm_node_positive_restriction", ' ')) { auto fRule = getFRule(kvs); - cfg.osmBuildOpts.restrPosRestr[fRule.kv.first].insert( + curCfg.osmBuildOpts.restrPosRestr[fRule.kv.first].insert( osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); } } if (p.hasKey(secStr, "osm_node_negative_restriction")) { + procedKeys.insert("osm_node_negative_restriction"); for (const auto& kvs : p.getStrArr(sec.first, "osm_node_negative_restriction", ' ')) { auto fRule = getFRule(kvs); - cfg.osmBuildOpts.restrNegRestr[fRule.kv.first].insert( + curCfg.osmBuildOpts.restrNegRestr[fRule.kv.first].insert( osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); } } if (p.hasKey(secStr, "osm_filter_no_restriction")) { + procedKeys.insert("osm_filter_no_restriction"); for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_no_restriction", ' ')) { auto fRule = getFRule(kvs); - cfg.osmBuildOpts.noRestrFilter[fRule.kv.first].insert( + curCfg.osmBuildOpts.noRestrFilter[fRule.kv.first].insert( osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); } } if (p.hasKey(secStr, "osm_station_name_attrs")) { + procedKeys.insert("osm_station_name_attrs"); for (const std::string& r : p.getStrArr(sec.first, "osm_station_name_attrs", ' ')) { - cfg.osmBuildOpts.statAttrRules.nameRule.push_back(getDeepAttrRule(r)); + curCfg.osmBuildOpts.statAttrRules.nameRule.push_back( + getDeepAttrRule(r)); } } if (p.hasKey(secStr, "osm_track_number_tags")) { + procedKeys.insert("osm_track_number_tags"); for (const std::string& r : p.getStrArr(sec.first, "osm_track_number_tags", ' ')) { - cfg.osmBuildOpts.statAttrRules.platformRule.push_back( + curCfg.osmBuildOpts.statAttrRules.platformRule.push_back( getDeepAttrRule(r)); } } if (p.hasKey(secStr, "osm_station_id_attrs")) { + procedKeys.insert("osm_station_id_attrs"); for (const std::string& r : p.getStrArr(sec.first, "osm_station_id_attrs", ' ')) { - cfg.osmBuildOpts.statAttrRules.idRule.push_back(getDeepAttrRule(r)); + curCfg.osmBuildOpts.statAttrRules.idRule.push_back(getDeepAttrRule(r)); } } if (p.hasKey(secStr, "osm_edge_track_number_tags")) { + procedKeys.insert("osm_edge_track_number_tags"); for (const std::string& r : p.getStrArr(sec.first, "osm_edge_track_number_tags", ' ')) { - cfg.osmBuildOpts.edgePlatformRules.push_back(getDeepAttrRule(r)); + curCfg.osmBuildOpts.edgePlatformRules.push_back(getDeepAttrRule(r)); } } if (p.hasKey(secStr, "osm_station_group_attrs")) { - LOG(WARN) << "Option osm_station_group_attrs has been removed."; + procedKeys.insert("osm_station_group_attrs"); + auto arr = p.getStrArr(secStr, "osm_station_group_attrs", ' '); + + for (const auto& ruleStr : arr) { + auto deep = getDeepAttrRule(ruleStr); + // TODO(patrick): getKv is misused here as a a=b parser + auto attrD = getKv(deep.attr); + deep.attr = attrD.first; + double dist = atof(attrD.second.c_str()); + curCfg.osmBuildOpts.statGroupNAttrRules.push_back({deep, dist}); + } } - // default value, to enable color writing on old configs - cfg.osmBuildOpts.relLinerules.colorRule = {"colour", "color"}; - if (p.hasKey(secStr, "osm_line_relation_tags")) { + procedKeys.insert("osm_line_relation_tags"); auto arr = p.getStrArr(secStr, "osm_line_relation_tags", ' '); for (const auto& ruleStr : arr) { auto rule = getKv(ruleStr); auto tags = util::split(rule.second, ','); if (rule.first == "from_name") - cfg.osmBuildOpts.relLinerules.fromNameRule = tags; + curCfg.osmBuildOpts.relLinerules.fromNameRule = tags; else if (rule.first == "to_name") - cfg.osmBuildOpts.relLinerules.toNameRule = tags; + curCfg.osmBuildOpts.relLinerules.toNameRule = tags; else if (rule.first == "line_name") - cfg.osmBuildOpts.relLinerules.sNameRule = tags; - else if (rule.first == "line_color") - cfg.osmBuildOpts.relLinerules.colorRule = tags; + curCfg.osmBuildOpts.relLinerules.sNameRule = tags; } } - cfg.osmBuildOpts.maxSnapDistance = 50; if (p.hasKey(secStr, "osm_max_snap_distance")) { - auto v = p.getDoubleArr(secStr, "osm_max_snap_distance", ','); - if (v.size()) cfg.osmBuildOpts.maxSnapDistance = v.back(); - } - - cfg.osmBuildOpts.maxStationCandDistance = - cfg.osmBuildOpts.maxSnapDistance * 2; - if (p.hasKey(secStr, "osm_max_station_cand_distance")) { - auto v = p.getDouble(secStr, "osm_max_station_cand_distance"); - cfg.osmBuildOpts.maxStationCandDistance = v; + procedKeys.insert("osm_max_snap_distance"); + curCfg.osmBuildOpts.maxSnapDistances = + p.getDoubleArr(secStr, "osm_max_snap_distance", ','); + } else { + curCfg.osmBuildOpts.maxSnapDistances.push_back(50); } if (p.hasKey(secStr, "osm_max_snap_fallback_distance")) { - LOG(WARN) << "Option osm_max_snap_fallback_distance has been removed."; + procedKeys.insert("osm_max_snap_fallback_distance"); + curCfg.osmBuildOpts.maxSnapFallbackHeurDistance = + p.getDouble(secStr, "osm_max_snap_fallback_distance"); + } else { + curCfg.osmBuildOpts.maxSnapFallbackHeurDistance = + *std::max_element(curCfg.osmBuildOpts.maxSnapDistances.begin(), + curCfg.osmBuildOpts.maxSnapDistances.end()) * + 2; } if (p.hasKey(secStr, "osm_max_osm_station_distance")) { - double ref = p.getDouble(secStr, "osm_max_osm_station_distance"); - cfg.osmBuildOpts.maxOsmStationDistances.push_back(ref); + procedKeys.insert("osm_max_osm_station_distance"); + curCfg.osmBuildOpts.maxOsmStationDistance = + p.getDouble(secStr, "osm_max_osm_station_distance"); } else { - cfg.osmBuildOpts.maxOsmStationDistances.push_back(15); + curCfg.osmBuildOpts.maxOsmStationDistance = 5; } if (p.hasKey(secStr, "osm_max_node_block_distance")) { - cfg.osmBuildOpts.maxBlockDistance = + procedKeys.insert("osm_max_node_block_distance"); + curCfg.osmBuildOpts.maxBlockDistance = p.getDouble(secStr, "osm_max_node_block_distance"); } else { - cfg.osmBuildOpts.maxBlockDistance = - *std::max_element(cfg.osmBuildOpts.maxOsmStationDistances.begin(), - cfg.osmBuildOpts.maxOsmStationDistances.end()) / + curCfg.osmBuildOpts.maxBlockDistance = + *std::max_element(curCfg.osmBuildOpts.maxSnapDistances.begin(), + curCfg.osmBuildOpts.maxSnapDistances.end()) / 8; } - double DEF_SPEED = 85; for (uint8_t i = 0; i < 8; i++) { std::string name = std::string("routing_lvl") + std::to_string(i) + "_fac"; if (p.hasKey(secStr, name)) { - double f = p.getPosDouble(sec.first, name); - LOG(WARN) << "Option " << name << " is deprecated, use osm_lvl" - << std::to_string(i) << "_avg_speed instead."; - double v = DEF_SPEED / f; - LOG(DEBUG) << " (using osm_lvl" << std::to_string(i) << "_avg_speed of " - << v << " instead)"; - cfg.osmBuildOpts.levelDefSpeed[i] = v * 0.2777; // store in m/s + procedKeys.insert(name); + double v = p.getDouble(sec.first, name); + curCfg.routingOpts.levelPunish[i] = v; + } else { + curCfg.routingOpts.levelPunish[i] = 1; } } - for (uint8_t i = 0; i < 8; i++) { - std::string name = - std::string("osm_lvl") + std::to_string(i) + "_avg_speed"; - if (p.hasKey(secStr, name)) { - double v = p.getPosDouble(sec.first, name); - cfg.osmBuildOpts.levelDefSpeed[i] = v * 0.2777; // store in m/s - } - } - - if (p.hasKey(secStr, "routing_one_way_meter_punish_fac")) { - LOG(WARN) << "Option routing_one_way_meter_punish_fac is deprecated, use " - "osm_one_way_speed_penalty_fac instead."; - cfg.osmBuildOpts.oneWaySpeedPen = - 1 + p.getPosDouble(secStr, "routing_one_way_meter_punish_fac"); - LOG(DEBUG) << " (using osm_one_way_speed_penalty_fac of " - << cfg.osmBuildOpts.oneWaySpeedPen << " instead)"; - } else { - cfg.osmBuildOpts.oneWaySpeedPen = 1; - } - - if (p.hasKey(secStr, "osm_one_way_speed_penalty_fac")) { - cfg.osmBuildOpts.oneWaySpeedPen = - p.getPosDouble(secStr, "osm_one_way_speed_penalty_fac"); - } else { - // def already set above - } - - if (p.hasKey(secStr, "osm_one_way_entry_cost")) { - cfg.osmBuildOpts.oneWayEntryCost = - p.getPosDouble(secStr, "osm_one_way_entry_cost"); - - } else { - cfg.osmBuildOpts.oneWayEntryCost = 0; - } - - // take the same cost for taking restricted turns to keep - // configuration simple - double val = cfg.osmBuildOpts.oneWayEntryCost * 10.0; - if (val > std::numeric_limits::max()) { - val = std::numeric_limits::max(); - } - - cfg.routingOpts.turnRestrCost = val; - if (p.hasKey(secStr, "routing_full_turn_punish")) { - double val = p.getPosDouble(secStr, "routing_full_turn_punish"); - - LOG(WARN) << "Option routing_full_turn_punish is deprecated, use " - "routing_full_turn_penalty instead."; - - val /= cfg.osmBuildOpts.levelDefSpeed[0]; - - LOG(DEBUG) << " (using routing_full_turn_penalty of " << val - << " instead)"; - - val *= 10.0; - - if (val > std::numeric_limits::max()) { - val = std::numeric_limits::max(); - } - - cfg.routingOpts.fullTurnPunishFac = val; - } - - if (p.hasKey(secStr, "routing_full_turn_penalty")) { - double val = p.getPosDouble(secStr, "routing_full_turn_penalty") * 10.0; - - if (val > std::numeric_limits::max()) { - val = std::numeric_limits::max(); - } - - cfg.routingOpts.fullTurnPunishFac = val; + procedKeys.insert("routing_full_turn_punish"); + curCfg.routingOpts.fullTurnPunishFac = + p.getDouble(secStr, "routing_full_turn_punish"); } if (p.hasKey(secStr, "routing_no_self_hops")) { - cfg.routingOpts.noSelfHops = p.getBool(secStr, "routing_no_self_hops"); + procedKeys.insert("routing_no_self_hops"); + curCfg.routingOpts.noSelfHops = p.getBool(secStr, "routing_no_self_hops"); } if (p.hasKey(secStr, "routing_full_turn_angle")) { - double ang = p.getPosDouble(secStr, "routing_full_turn_angle"); - cfg.routingOpts.fullTurnAngle = ang; - cfg.osmBuildOpts.fullTurnAngle = ang; + procedKeys.insert("routing_full_turn_angle"); + double ang = p.getDouble(secStr, "routing_full_turn_angle"); + curCfg.routingOpts.fullTurnAngle = ang; + curCfg.osmBuildOpts.fullTurnAngle = ang; } else { - cfg.routingOpts.fullTurnAngle = 5; - cfg.osmBuildOpts.fullTurnAngle = 5; + curCfg.routingOpts.fullTurnAngle = 5; + curCfg.osmBuildOpts.fullTurnAngle = 5; } if (p.hasKey(secStr, "routing_snap_full_turn_angle")) { - double ang = p.getPosDouble(secStr, "routing_snap_full_turn_angle"); - cfg.osmBuildOpts.maxAngleSnapReach = ang; + procedKeys.insert("routing_snap_full_turn_angle"); + double ang = p.getDouble(secStr, "routing_snap_full_turn_angle"); + curCfg.osmBuildOpts.maxAngleSnapReach = ang; } else { - cfg.osmBuildOpts.maxAngleSnapReach = cfg.routingOpts.fullTurnAngle; + curCfg.osmBuildOpts.maxAngleSnapReach = curCfg.routingOpts.fullTurnAngle; } if (p.hasKey(secStr, "routing_pass_thru_station_punish")) { - LOG(WARN) << "Option routing_pass_thru_station_punish has been removed."; + procedKeys.insert("routing_pass_thru_station_punish"); + curCfg.routingOpts.passThruStationsPunish = + p.getDouble(secStr, "routing_pass_thru_station_punish"); } - cfg.routingOpts.turnRestrCost *= 10.0; - - if (p.hasKey(secStr, "routing_no_lines_punish_fac")) { - LOG(WARN) << "Option routing_no_lines_punish_fac is deprecated, use " - "routing_no_lines_penalty_fac instead."; - - cfg.routingOpts.noLinesPunishFact = - 1 + p.getPosDouble(secStr, "routing_no_lines_punish_fac"); - - LOG(DEBUG) << " (using routing_no_lines_penalty_fac of " - << cfg.routingOpts.noLinesPunishFact << " instead)"; - } else { - cfg.routingOpts.noLinesPunishFact = 1; + if (p.hasKey(secStr, "routing_one_way_meter_punish_fac")) { + procedKeys.insert("routing_one_way_meter_punish_fac"); + curCfg.routingOpts.oneWayPunishFac = + p.getDouble(secStr, "routing_one_way_meter_punish_fac"); } - if (p.hasKey(secStr, "routing_no_lines_penalty_fac")) { - cfg.routingOpts.noLinesPunishFact = - p.getPosDouble(secStr, "routing_no_lines_penalty_fac"); - } else { - // default already set above + if (p.hasKey(secStr, "routing_one_way_edge_punish")) { + procedKeys.insert("routing_one_way_edge_punish"); + curCfg.routingOpts.oneWayEdgePunish = + p.getDouble(secStr, "routing_one_way_edge_punish"); } - // store this at two places, as we are writing the punishment into the graph - cfg.osmBuildOpts.noLinesPunishFact = cfg.routingOpts.noLinesPunishFact; - if (p.hasKey(secStr, "routing_line_unmatched_punish_fac")) { - LOG(WARN) - << "Option routing_line_unmatched_punish_fac is deprecated, use " - "routing_line_unmatched_time_penalty_fac, " - "routing_line_station_from_unmatched_time_penalty, and " - "routing_line_station_to_unmatched_time_penalty instead."; - - cfg.routingOpts.lineUnmatchedPunishFact = - 1 + p.getPosDouble(secStr, "routing_line_unmatched_punish_fac") / 3; - - cfg.routingOpts.lineNameFromUnmatchedPunishFact = - 1 + p.getPosDouble(secStr, "routing_line_unmatched_punish_fac") / 3; - - cfg.routingOpts.lineNameToUnmatchedPunishFact = - 1 + p.getPosDouble(secStr, "routing_line_unmatched_punish_fac") / 3; - - LOG(DEBUG) << " (using routing_line_unmatched_punish_fac of " - << cfg.routingOpts.lineUnmatchedPunishFact << " instead)"; - LOG(DEBUG) - << " (using routing_line_station_from_unmatched_time_penalty of " - << cfg.routingOpts.lineNameFromUnmatchedPunishFact << " instead)"; - LOG(DEBUG) << " (using routing_line_station_to_unmatched_time_penalty of " - << cfg.routingOpts.lineNameToUnmatchedPunishFact - << " instead)"; + procedKeys.insert("routing_line_unmatched_punish_fac"); + curCfg.routingOpts.lineUnmatchedPunishFact = + p.getDouble(secStr, "routing_line_unmatched_punish_fac"); } - if (p.hasKey(secStr, "routing_line_unmatched_time_penalty_fac")) { - cfg.routingOpts.lineUnmatchedPunishFact = - p.getPosDouble(secStr, "routing_line_unmatched_time_penalty_fac"); - } - - if (p.hasKey(secStr, "routing_line_station_from_unmatched_time_penalty")) { - cfg.routingOpts.lineNameFromUnmatchedPunishFact = p.getPosDouble( - secStr, "routing_line_station_from_unmatched_time_penalty"); - } - - if (p.hasKey(secStr, "routing_line_station_to_unmatched_time_penalty")) { - cfg.routingOpts.lineNameToUnmatchedPunishFact = p.getPosDouble( - secStr, "routing_line_station_to_unmatched_time_penalty"); + if (p.hasKey(secStr, "routing_no_lines_punish_fac")) { + procedKeys.insert("routing_no_lines_punish_fac"); + curCfg.routingOpts.noLinesPunishFact = + p.getDouble(secStr, "routing_no_lines_punish_fac"); } if (p.hasKey(secStr, "routing_platform_unmatched_punish")) { - LOG(WARN) - << "Option routing_platform_unmatched_punish is deprecated, use " - "routing_platform_unmatched_penalty instead."; - cfg.routingOpts.platformUnmatchedPen = - p.getPosDouble(secStr, "routing_platform_unmatched_punish"); - - cfg.routingOpts.platformUnmatchedPen = - cfg.routingOpts.platformUnmatchedPen * - (DEF_TRANS_PEN / cfg.osmBuildOpts.levelDefSpeed[0]); - - LOG(DEBUG) << " (using routing_platform_unmatched_penalty of " - << cfg.routingOpts.platformUnmatchedPen << " instead)"; - } else { - cfg.routingOpts.platformUnmatchedPen = 0; - } - - if (p.hasKey(secStr, "routing_platform_unmatched_penalty")) { - cfg.routingOpts.platformUnmatchedPen = - p.getPosDouble(secStr, "routing_platform_unmatched_penalty"); - } else { - // default already set above - } - - if (p.hasKey(secStr, "routing_transition_penalty_fac")) { - cfg.routingOpts.transitionPen = - p.getPosDouble(secStr, "routing_transition_penalty_fac"); - } else { - cfg.routingOpts.transitionPen = DEF_TRANS_PEN; - } - - if (p.hasKey(secStr, "routing_station_distance_punish_fac")) { - cfg.routingOpts.stationDistPenFactor = - p.getPosDouble(secStr, "routing_station_distance_punish_fac"); - LOG(WARN) << "Option routing_station_distance_punish_fac is deprecated, " - "use routing_station_move_penalty_fac instead."; - cfg.routingOpts.stationDistPenFactor = - cfg.routingOpts.stationDistPenFactor * - (DEF_TRANS_PEN / cfg.osmBuildOpts.levelDefSpeed[0]); - LOG(DEBUG) << " (using routing_station_move_penalty_fac of " - << cfg.routingOpts.stationDistPenFactor << " instead)"; - } else { - cfg.routingOpts.stationDistPenFactor = - cfg.routingOpts.stationDistPenFactor * - (DEF_TRANS_PEN / cfg.osmBuildOpts.levelDefSpeed[0]); - } - - if (p.hasKey(secStr, "routing_station_move_penalty_fac")) { - cfg.routingOpts.stationDistPenFactor = - p.getPosDouble(secStr, "routing_station_move_penalty_fac"); - } else { - // the default value was already set above + procedKeys.insert("routing_platform_unmatched_punish"); + curCfg.routingOpts.platformUnmatchedPen = + p.getDouble(secStr, "routing_platform_unmatched_punish"); } if (p.hasKey(secStr, "routing_non_osm_station_punish")) { - cfg.routingOpts.nonStationPen = - p.getPosDouble(secStr, "routing_non_osm_station_punish"); - LOG(WARN) << "Option routing_non_osm_station_punish is deprecated, use " - "routing_non_station_penalty instead."; - cfg.routingOpts.nonStationPen = - cfg.routingOpts.nonStationPen * - (DEF_TRANS_PEN / cfg.osmBuildOpts.levelDefSpeed[0]); - LOG(DEBUG) << " (using routing_non_station_penalty of " - << cfg.routingOpts.nonStationPen << " instead)"; + procedKeys.insert("routing_non_osm_station_punish"); + curCfg.routingOpts.nonOsmPen = + p.getDouble(secStr, "routing_non_osm_station_punish"); } else { - cfg.routingOpts.nonStationPen = 0; + curCfg.routingOpts.nonOsmPen = 0; } - if (p.hasKey(secStr, "routing_non_station_penalty")) { - cfg.routingOpts.nonStationPen = - p.getPosDouble(secStr, "routing_non_station_penalty"); + if (p.hasKey(secStr, "routing_station_distance_punish_fac")) { + procedKeys.insert("routing_station_distance_punish_fac"); + curCfg.routingOpts.stationDistPenFactor = + p.getDouble(secStr, "routing_station_distance_punish_fac"); } else { - // default was already set above - } - - if (p.hasKey(secStr, "routing_station_unmatched_penalty")) { - cfg.routingOpts.stationUnmatchedPen = - p.getPosDouble(secStr, "routing_station_unmatched_penalty"); - } else { - cfg.routingOpts.stationUnmatchedPen = cfg.routingOpts.nonStationPen / 2; + curCfg.routingOpts.stationDistPenFactor = 1; } if (p.hasKey(secStr, "station_normalize_chain")) { + procedKeys.insert("station_normalize_chain"); try { auto arr = p.getStrArr(secStr, "station_normalize_chain", ';'); - cfg.osmBuildOpts.statNormzer = trgraph::Normalizer(getNormRules(arr)); + curCfg.osmBuildOpts.statNormzer = + trgraph::Normalizer(getNormRules(arr)); } catch (const std::exception& e) { throw ParseExc(p.getVal(secStr, "station_normalize_chain").line, p.getVal(secStr, "station_normalize_chain").pos, @@ -557,9 +375,11 @@ void MotConfigReader::parse(const std::vector& paths, } if (p.hasKey(secStr, "track_normalize_chain")) { + procedKeys.insert("track_normalize_chain"); try { auto arr = p.getStrArr(secStr, "track_normalize_chain", ';'); - cfg.osmBuildOpts.trackNormzer = trgraph::Normalizer(getNormRules(arr)); + curCfg.osmBuildOpts.trackNormzer = + trgraph::Normalizer(getNormRules(arr)); } catch (const std::exception& e) { throw ParseExc(p.getVal(secStr, "track_normalize_chain").line, p.getVal(secStr, "track_normalize_chain").pos, @@ -570,9 +390,11 @@ void MotConfigReader::parse(const std::vector& paths, } if (p.hasKey(secStr, "line_normalize_chain")) { + procedKeys.insert("line_normalize_chain"); try { auto arr = p.getStrArr(secStr, "line_normalize_chain", ';'); - cfg.osmBuildOpts.lineNormzer = trgraph::Normalizer(getNormRules(arr)); + curCfg.osmBuildOpts.lineNormzer = + trgraph::Normalizer(getNormRules(arr)); } catch (const std::exception& e) { throw ParseExc(p.getVal(secStr, "line_normalize_chain").line, p.getVal(secStr, "line_normalize_chain").pos, @@ -583,9 +405,10 @@ void MotConfigReader::parse(const std::vector& paths, } if (p.hasKey(secStr, "station_id_normalize_chain")) { + procedKeys.insert("station_id_normalize_chain"); try { auto arr = p.getStrArr(secStr, "station_id_normalize_chain", ';'); - cfg.osmBuildOpts.idNormzer = trgraph::Normalizer(getNormRules(arr)); + curCfg.osmBuildOpts.idNormzer = trgraph::Normalizer(getNormRules(arr)); } catch (const std::exception& e) { throw ParseExc(p.getVal(secStr, "station_id_normalize_chain").line, p.getVal(secStr, "station_id_normalize_chain").pos, @@ -595,41 +418,18 @@ void MotConfigReader::parse(const std::vector& paths, } } - // determine the maximum possible speed for this config, this is later - // used to filter out station which are so far out of reach we don't - // have to consider them for the bounding box calculation - cfg.osmBuildOpts.maxSpeed = 0; - cfg.osmBuildOpts.maxSpeedCorFac = 1; - for (size_t i = 0; i < 8; i++) { - if (cfg.osmBuildOpts.levelDefSpeed[i] > cfg.osmBuildOpts.maxSpeed) - cfg.osmBuildOpts.maxSpeed = cfg.osmBuildOpts.levelDefSpeed[i]; + for (const auto& kv : p.getKeyVals(secStr)) { + if (!procedKeys.count(kv.first)) + curCfg.unproced[kv.first] = kv.second.val; } - if (cfg.routingOpts.lineUnmatchedPunishFact < 1) - cfg.osmBuildOpts.maxSpeedCorFac *= - cfg.routingOpts.lineUnmatchedPunishFact; - if (cfg.routingOpts.lineNameFromUnmatchedPunishFact < 1) - cfg.osmBuildOpts.maxSpeedCorFac *= - cfg.routingOpts.lineNameFromUnmatchedPunishFact; - if (cfg.routingOpts.lineNameToUnmatchedPunishFact < 1) - cfg.osmBuildOpts.maxSpeedCorFac *= - cfg.routingOpts.lineNameToUnmatchedPunishFact; - - if (cfg.routingOpts.noLinesPunishFact < 1) - cfg.osmBuildOpts.maxSpeedCorFac *= cfg.routingOpts.noLinesPunishFact; - - if (cfg.osmBuildOpts.oneWaySpeedPen < 1) - cfg.osmBuildOpts.maxSpeedCorFac *= cfg.osmBuildOpts.oneWaySpeedPen; - - cfg.osmBuildOpts.maxSpeed /= cfg.osmBuildOpts.maxSpeedCorFac; - bool found = false; - for (auto& exCfg : _cfgs) { - if (cfg == exCfg) { + for (auto& cfg : _cfgs) { + if (cfg == curCfg) { for (auto mot : ad::cppgtfs::gtfs::flat::Route::getTypesFromString(secStr)) { - exCfg.mots.insert(mot); + cfg.mots.insert(mot); } found = true; break; @@ -637,8 +437,8 @@ void MotConfigReader::parse(const std::vector& paths, } if (!found) { - cfg.mots = ad::cppgtfs::gtfs::flat::Route::getTypesFromString(secStr); - _cfgs.push_back(cfg); + curCfg.mots = ad::cppgtfs::gtfs::flat::Route::getTypesFromString(secStr); + _cfgs.push_back(curCfg); } } } diff --git a/src/pfaedle/config/MotConfigReader.h b/src/pfaedle/config/MotConfigReader.h index 86e3ffb..51716ff 100644 --- a/src/pfaedle/config/MotConfigReader.h +++ b/src/pfaedle/config/MotConfigReader.h @@ -23,7 +23,7 @@ using ad::cppgtfs::gtfs::Route; class MotConfigReader { public: MotConfigReader(); - void parse(const std::vector& paths, const std::string& literal); + void parse(const std::vector& paths); const std::vector& getConfigs() const; diff --git a/src/pfaedle/config/PfaedleConfig.h b/src/pfaedle/config/PfaedleConfig.h index 211ada3..e5bc41a 100644 --- a/src/pfaedle/config/PfaedleConfig.h +++ b/src/pfaedle/config/PfaedleConfig.h @@ -5,13 +5,11 @@ #ifndef PFAEDLE_CONFIG_PFAEDLECONFIG_H_ #define PFAEDLE_CONFIG_PFAEDLECONFIG_H_ -#include #include #include #include - +#include #include "ad/cppgtfs/gtfs/Route.h" -#include "util/geo/Geo.h" namespace pfaedle { namespace config { @@ -22,53 +20,39 @@ struct Config { Config() : dbgOutputPath("."), solveMethod("global"), + evalPath("."), outputPath("gtfs-out"), dropShapes(false), useHMM(false), writeGraph(false), + writeCombGraph(false), + evaluate(false), buildTransitGraph(false), useCaching(false), writeOverpass(false), - writeOsmfilter(false), inPlace(false), - writeColors(false), - noFastHops(false), - noAStar(false), - noTrie(false), - noHopCache(false), - writeStats(false), - parseAdditionalGTFSFields(false), - gridSize(2000 / util::geo::M_PER_DEG), - gaussianNoise(0), - verbosity(0) {} + gridSize(2000) {} std::string dbgOutputPath; std::string solveMethod; + std::string evalPath; std::string shapeTripId; std::string outputPath; std::string writeOsm; std::string osmPath; - std::string motCfgParam; + std::string evalDfBins; std::vector feedPaths; std::vector configPaths; std::set mots; bool dropShapes; bool useHMM; bool writeGraph; + bool writeCombGraph; + bool evaluate; bool buildTransitGraph; bool useCaching; bool writeOverpass; - bool writeOsmfilter; bool inPlace; - bool writeColors; - bool noFastHops; - bool noAStar; - bool noTrie; - bool noHopCache; - bool writeStats; - bool parseAdditionalGTFSFields; double gridSize; - double gaussianNoise; - uint8_t verbosity; std::string toString() { std::stringstream ss; @@ -80,19 +64,10 @@ struct Config { << "drop-shapes: " << dropShapes << "\n" << "use-hmm: " << useHMM << "\n" << "write-graph: " << writeGraph << "\n" + << "write-cgraph: " << writeCombGraph << "\n" << "grid-size: " << gridSize << "\n" << "use-cache: " << useCaching << "\n" << "write-overpass: " << writeOverpass << "\n" - << "write-osmfilter: " << writeOsmfilter << "\n" - << "inplace: " << inPlace << "\n" - << "write-colors: " << writeColors << "\n" - << "no-fast-hops: " << noFastHops << "\n" - << "no-a-star: " << noAStar << "\n" - << "no-trie: " << noTrie << "\n" - << "no-hop-cache: " << noHopCache << "\n" - << "verbosity: " << verbosity << "\n" - << "parse-additional-gtfs-fields: " << parseAdditionalGTFSFields << "\n" - << "write-stats: " << writeStats << "\n" << "feed-paths: "; for (const auto& p : feedPaths) { diff --git a/src/pfaedle/eval/Collector.cpp b/src/pfaedle/eval/Collector.cpp new file mode 100644 index 0000000..709830b --- /dev/null +++ b/src/pfaedle/eval/Collector.cpp @@ -0,0 +1,417 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#include +#include +#include +#include +#include +#include "ad/cppgtfs/gtfs/Feed.h" +#include "pfaedle/Def.h" +#include "pfaedle/eval/Collector.h" +#include "pfaedle/eval/Result.h" +#include "util/geo/Geo.h" +#include "util/geo/PolyLine.h" +#include "util/geo/output/GeoJsonOutput.h" +#include "util/log/Log.h" + +using util::geo::PolyLine; + +using pfaedle::gtfs::Trip; +using ad::cppgtfs::gtfs::Shape; +using pfaedle::eval::Collector; +using pfaedle::eval::Result; +using util::geo::output::GeoJsonOutput; + +// _____________________________________________________________________________ +double Collector::add(const Trip* t, const Shape* oldS, const Shape& newS, + const std::vector& newTripDists) { + if (!oldS) { + _noOrigShp++; + return 0; + } + + for (auto st : t->getStopTimes()) { + if (st.getShapeDistanceTravelled() < 0) { + // we cannot safely compare trips without shape dist travelled + // info + _noOrigShp++; + return 0; + } + } + + double fd = 0; + size_t unmatchedSegments; + double unmatchedSegmentsLength; + + std::vector oldDists; + LINE oldL = getWebMercLine( + oldS, t->getStopTimes().begin()->getShapeDistanceTravelled(), + (--t->getStopTimes().end())->getShapeDistanceTravelled(), &oldDists); + + std::vector newDists; + LINE newL = getWebMercLine(&newS, -1, -1, &newDists); + + std::ofstream fstr(_evalOutPath + "/trip-" + t->getId() + ".json"); + GeoJsonOutput gjout(fstr); + + auto oldSegs = segmentize(t, oldL, oldDists, 0); + auto newSegs = segmentize(t, newL, newDists, &newTripDists); + + // cut both result at the beginning and end to clear evaluation from + // loops at the end + POLYLINE oldStart = oldSegs[0]; + POLYLINE newStart = newSegs[0]; + auto oldStartNew = + oldStart.getSegment(oldStart.projectOn(newSegs[0][0]).totalPos, 1); + auto newStartNew = + newStart.getSegment(newStart.projectOn(oldSegs[0][0]).totalPos, 1); + if (fabs(oldStartNew.getLength() - oldStart.getLength()) / + oldStart.getLength() < + 0.5 && + fabs(newStartNew.getLength() - newStart.getLength()) / + newStart.getLength() < + 0.5) { + oldSegs[0] = oldStartNew.getLine(); + newSegs[0] = newStartNew.getLine(); + } + + POLYLINE oldEnd = oldSegs[oldSegs.size() - 1]; + POLYLINE newEnd = newSegs[oldSegs.size() - 1]; + auto oldEndNew = + oldEnd.getSegment(0, oldEnd.projectOn(newSegs.back().back()).totalPos); + auto newEndNew = + newEnd.getSegment(0, newEnd.projectOn(oldSegs.back().back()).totalPos); + if (fabs(oldEndNew.getLength() - oldEnd.getLength()) / oldEnd.getLength() < + 0.5 && + fabs(newEndNew.getLength() - newEnd.getLength()) / newEnd.getLength() < + 0.5) { + oldSegs[oldSegs.size() - 1] = oldEndNew.getLine(); + newSegs[newSegs.size() - 1] = newEndNew.getLine(); + } + + // check for suspicious (most likely erroneous) lines in the + // ground truth data which have a long straight-line segment + + for (auto oldL : oldSegs) { + for (size_t i = 1; i < oldL.size(); i++) { + if (util::geo::webMercMeterDist(oldL[i - 1], oldL[i]) > 500) { + // return 0; + } + } + } + + // new lines build from cleaned-up shapes + LINE oldLCut; + LINE newLCut; + + for (auto oldL : oldSegs) { + gjout.printLatLng(oldL, util::json::Dict{{"ver", "old"}}); + oldLCut.insert(oldLCut.end(), oldL.begin(), oldL.end()); + } + + for (auto newL : newSegs) { + gjout.printLatLng(newL, util::json::Dict{{"ver", "new"}}); + newLCut.insert(newLCut.end(), newL.begin(), newL.end()); + } + + gjout.flush(); + fstr.close(); + + double fac = cos(2 * atan(exp((oldSegs.front().front().getY() + + oldSegs.back().back().getY()) / + 6378137.0)) - + 1.5707965); + + if (_dCache.count(oldS) && _dCache.find(oldS)->second.count(newS.getId())) { + fd = _dCache[oldS][newS.getId()]; + } else { + fd = util::geo::accFrechetDistC(oldLCut, newLCut, 5 / fac) * fac; + _dCache[oldS][newS.getId()] = fd; + } + + if (_dACache.count(oldS) && _dACache.find(oldS)->second.count(newS.getId())) { + unmatchedSegments = _dACache[oldS][newS.getId()].first; + unmatchedSegmentsLength = _dACache[oldS][newS.getId()].second; + } else { + auto dA = getDa(oldSegs, newSegs); + _dACache[oldS][newS.getId()] = dA; + unmatchedSegments = dA.first; + unmatchedSegmentsLength = dA.second; + } + + double totL = 0; + for (auto l : oldSegs) totL += util::geo::len(l) * fac; + + // filter out shapes with a lenght of under 5 meters - they are most likely + // artifacts + if (totL < 5) { + _noOrigShp++; + return 0; + } + + _fdSum += fd / totL; + _unmatchedSegSum += unmatchedSegments; + _unmatchedSegLengthSum += unmatchedSegmentsLength; + _results.insert(Result(t, fd / totL)); + _resultsAN.insert(Result(t, static_cast(unmatchedSegments) / + static_cast(oldSegs.size()))); + _resultsAL.insert(Result(t, unmatchedSegmentsLength / totL)); + + LOG(DEBUG) << "This result (" << t->getId() + << "): A_N/N = " << unmatchedSegments << "/" << oldSegs.size() + << " = " + << static_cast(unmatchedSegments) / + static_cast(oldSegs.size()) + << " A_L/L = " << unmatchedSegmentsLength << "/" << totL << " = " + << unmatchedSegmentsLength / totL << " d_f = " << fd; + + return fd; +} + +// _____________________________________________________________________________ +std::vector Collector::segmentize( + const Trip* t, const LINE& shape, const std::vector& dists, + const std::vector* newTripDists) { + std::vector ret; + + if (t->getStopTimes().size() < 2) return ret; + + POLYLINE pl(shape); + std::vector > cuts; + + size_t i = 0; + for (auto st : t->getStopTimes()) { + if (newTripDists) { + cuts.push_back(std::pair( + util::geo::latLngToWebMerc(st.getStop()->getLat(), + st.getStop()->getLng()), + (*newTripDists)[i])); + } else { + cuts.push_back(std::pair( + util::geo::latLngToWebMerc(st.getStop()->getLat(), + st.getStop()->getLng()), + st.getShapeDistanceTravelled())); + } + i++; + } + + // get first half of geometry, and search for start point there! + size_t before = std::upper_bound(dists.begin(), dists.end(), cuts[1].second) - + dists.begin(); + if (before + 1 > shape.size()) before = shape.size() - 1; + assert(shape.begin() + before + 1 <= shape.end()); + POLYLINE l(LINE(shape.begin(), shape.begin() + before + 1)); + auto lastLp = l.projectOn(cuts.front().first); + + for (size_t i = 1; i < cuts.size(); i++) { + size_t before = shape.size(); + if (i < cuts.size() - 1 && cuts[i + 1].second > -0.5) { + before = + std::upper_bound(dists.begin(), dists.end(), cuts[i + 1].second) - + dists.begin(); + } + + POLYLINE beforePl(LINE(shape.begin(), shape.begin() + before)); + + auto curLp = beforePl.projectOnAfter(cuts[i].first, lastLp.lastIndex); + + ret.push_back(pl.getSegment(lastLp, curLp).getLine()); + lastLp = curLp; + } + + // std::raise(SIGABRT); + return ret; +} + +// _____________________________________________________________________________ +LINE Collector::getWebMercLine(const Shape* s, double from, double t) { + return getWebMercLine(s, from, t, 0); +} + +// _____________________________________________________________________________ +LINE Collector::getWebMercLine(const Shape* s, double from, double to, + std::vector* dists) { + LINE ret; + + auto i = s->getPoints().begin(); + + for (; i != s->getPoints().end(); i++) { + auto p = *i; + + if ((from < 0 || (p.travelDist - from) > -0.01)) { + if (to >= 0 && (p.travelDist - to) > 0.01) break; + + POINT mercP = util::geo::latLngToWebMerc(p.lat, p.lng); + + ret.push_back(mercP); + if (dists) dists->push_back(p.travelDist); + } + } + + return ret; +} + +// _____________________________________________________________________________ +const std::set& Collector::getResults() const { return _results; } + +// _____________________________________________________________________________ +double Collector::getAvgDist() const { return _fdSum / _results.size(); } + +// _____________________________________________________________________________ +void Collector::printHisto(std::ostream* os, const std::set& result, + const std::vector& bins) const { + size_t W = 60; + + auto it = result.begin(); + std::vector > res; + std::vector examples; + size_t maxC = 0; + + for (size_t i = 0; i < bins.size(); i++) { + size_t c = 0; + const Trip* trip = 0; + + while (it != result.end() && it->getDist() <= (bins[i] + 0.001)) { + if (!trip) trip = it->getTrip(); + c++; + it++; + } + + if (c > maxC) maxC = c; + + examples.push_back(trip); + res.push_back(std::pair(bins[i], c)); + } + + size_t j = 0; + for (auto r : res) { + std::string range = util::toString(r.first); + (*os) << " < " << std::setfill(' ') << std::setw(10) << range << ": "; + size_t i = 0; + + for (; i < W * (static_cast(r.second) / static_cast(maxC)); + i++) { + (*os) << "|"; + } + + if (r.second) + (*os) << " (" << r.second << ", e.g. #" << examples[j]->getId() << ")"; + (*os) << std::endl; + j++; + } +} + +// _____________________________________________________________________________ +std::vector Collector::getBins(double mind, double maxd, size_t steps) { + double bin = (maxd - mind) / steps; + double curE = mind + bin; + + std::vector ret; + while (curE <= maxd) { + ret.push_back(curE); + curE += bin; + } + return ret; +} + +// _____________________________________________________________________________ +void Collector::printCsv(std::ostream* os, const std::set& result, + const std::vector& bins) const { + auto it = result.begin(); + std::vector > res; + + for (size_t i = 0; i < bins.size(); i++) { + size_t c = 0; + const Trip* trip = 0; + + while (it != result.end() && it->getDist() <= (bins[i] + 0.001)) { + if (!trip) trip = it->getTrip(); + c++; + it++; + } + + res.push_back(std::pair(bins[i], c)); + } + + (*os) << "range, count\n"; + for (auto r : res) { + (*os) << r.first << "," << r.second << "\n"; + } +} + +// _____________________________________________________________________________ +void Collector::printStats(std::ostream* os) const { + size_t buckets = 10; + (*os) << "\n ===== Evalution results =====\n\n"; + + (*os) << std::setfill(' ') << std::setw(30) + << " # of trips new shapes were matched for: " << _results.size() + << "\n"; + (*os) << std::setw(30) << " # of trips without input shapes: " << _noOrigShp + << "\n"; + + if (_results.size()) { + (*os) << std::setw(30) << " highest distance to input shapes: " + << (--_results.end())->getDist() << " (on trip #" + << (--_results.end())->getTrip()->getId() << ")\n"; + (*os) << std::setw(30) << " lowest distance to input shapes: " + << (_results.begin())->getDist() << " (on trip #" + << (_results.begin())->getTrip()->getId() << ")\n"; + (*os) << std::setw(30) << " avg total frechet distance: " << getAvgDist() + << "\n"; + + std::vector dfBins = getBins( + (_results.begin())->getDist(), (--_results.end())->getDist(), buckets); + + if (_dfBins.size()) dfBins = _dfBins; + + (*os) << "\n -- Histogram of d_f for this run -- " << std::endl; + printHisto(os, _results, dfBins); + + std::ofstream fstr1(_evalOutPath + "/eval-frechet.csv"); + printCsv(&fstr1, _results, dfBins); + + (*os) << "\n\n\n -- Histogram of A_N/N for this run -- " << std::endl; + printHisto(os, _resultsAN, + getBins((_resultsAN.begin())->getDist(), + (--_resultsAN.end())->getDist(), buckets)); + std::ofstream fstr2(_evalOutPath + "/eval-AN.csv"); + printCsv(&fstr2, _resultsAN, getBins(0, 1, 20)); + + (*os) << "\n\n\n -- Histogram of A_L/L for this run -- " << std::endl; + printHisto(os, _resultsAL, + getBins((_resultsAL.begin())->getDist(), + (--_resultsAL.end())->getDist(), buckets)); + std::ofstream fstr3(_evalOutPath + "/eval-AL.csv"); + printCsv(&fstr3, _resultsAL, getBins(0, 1, 20)); + } + + (*os) << "\n ===== End of evaluation results =====\n"; + (*os) << std::endl; +} + +// _____________________________________________________________________________ +std::pair Collector::getDa(const std::vector& a, + const std::vector& b) { + assert(a.size() == b.size()); + std::pair ret{0, 0}; + + // euclidean distance on web mercator is in meters on equator, + // and proportional to cos(lat) in both y directions + double fac = + cos(2 * atan(exp((a.front().front().getY() + a.back().back().getY()) / + 6378137.0)) - + 1.5707965); + + for (size_t i = 0; i < a.size(); i++) { + double fd = util::geo::frechetDist(a[i], b[i], 3 / fac) * fac; + if (fd >= 20) { + ret.first++; + ret.second += util::geo::len(a[i]) * fac; + } + } + + return ret; +} diff --git a/src/pfaedle/eval/Collector.h b/src/pfaedle/eval/Collector.h new file mode 100644 index 0000000..853314e --- /dev/null +++ b/src/pfaedle/eval/Collector.h @@ -0,0 +1,95 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_EVAL_COLLECTOR_H_ +#define PFAEDLE_EVAL_COLLECTOR_H_ + +#include +#include +#include +#include +#include +#include +#include "ad/cppgtfs/gtfs/Feed.h" +#include "pfaedle/gtfs/Feed.h" +#include "pfaedle/Def.h" +#include "pfaedle/eval/Result.h" +#include "util/geo/Geo.h" + +using pfaedle::gtfs::Trip; +using ad::cppgtfs::gtfs::Shape; + +namespace pfaedle { +namespace eval { + +/* + * Collects routing results for evaluation + */ +class Collector { + public: + Collector(const std::string& evalOutPath, const std::vector& dfBins) + : _noOrigShp(0), + _fdSum(0), + _unmatchedSegSum(0), + _unmatchedSegLengthSum(0), + _evalOutPath(evalOutPath), + _dfBins(dfBins) {} + + // Add a shape found by our tool newS for a trip t with newly calculated + // station dist values with the old shape oldS + double add(const Trip* t, const Shape* oldS, const Shape& newS, + const std::vector& newDists); + + // Return the set of all Result objects + const std::set& getResults() const; + + // Print general stats to os + void printStats(std::ostream* os) const; + + // Print histogramgs for the results to os + void printHisto(std::ostream* os, const std::set& result, + const std::vector& bins) const; + + // Print a CSV for the results to os + void printCsv(std::ostream* os, const std::set& result, + const std::vector& bins) const; + + // Return the averaged average frechet distance + double getAvgDist() const; + + static LINE getWebMercLine(const Shape* s, double from, double to); + static LINE getWebMercLine(const Shape* s, double from, double to, + std::vector* dists); + + private: + std::set _results; + std::set _resultsAN; + std::set _resultsAL; + std::map > _dCache; + std::map > > + _dACache; + size_t _noOrigShp; + + double _fdSum; + size_t _unmatchedSegSum; + double _unmatchedSegLengthSum; + + std::string _evalOutPath; + + std::vector _dfBins; + + static std::pair getDa(const std::vector& a, + const std::vector& b); + + static std::vector segmentize(const Trip* t, const LINE& shape, + const std::vector& dists, + const std::vector* newTripDists); + + static std::vector getBins(double mind, double maxd, size_t steps); +}; + +} // namespace eval +} // namespace pfaedle + +#endif // PFAEDLE_EVAL_COLLECTOR_H_ diff --git a/src/shapevl/Result.h b/src/pfaedle/eval/Result.h similarity index 93% rename from src/shapevl/Result.h rename to src/pfaedle/eval/Result.h index 0bfa667..b039234 100644 --- a/src/shapevl/Result.h +++ b/src/pfaedle/eval/Result.h @@ -5,9 +5,10 @@ #ifndef PFAEDLE_EVAL_RESULT_H_ #define PFAEDLE_EVAL_RESULT_H_ +#include "pfaedle/gtfs/Feed.h" #include "ad/cppgtfs/gtfs/Feed.h" -using ad::cppgtfs::gtfs::Trip; +using pfaedle::gtfs::Trip; using ad::cppgtfs::gtfs::Shape; namespace pfaedle { diff --git a/src/pfaedle/gtfs/Feed.h b/src/pfaedle/gtfs/Feed.h index b6a04b9..8e5cc7b 100644 --- a/src/pfaedle/gtfs/Feed.h +++ b/src/pfaedle/gtfs/Feed.h @@ -6,7 +6,7 @@ #define PFAEDLE_GTFS_FEED_H_ #include - +#include "Route.h" #include "Service.h" #include "ShapeContainer.h" #include "StopTime.h" @@ -21,17 +21,14 @@ namespace pfaedle { namespace gtfs { typedef ad::cppgtfs::gtfs::FeedB< - ad::cppgtfs::gtfs::Agency, ad::cppgtfs::gtfs::Route, - ad::cppgtfs::gtfs::Stop, Service, StopTime, Shape, ad::cppgtfs::gtfs::Fare, - ad::cppgtfs::gtfs::Level, ad::cppgtfs::gtfs::Pathway, - ad::cppgtfs::gtfs::Container, ad::cppgtfs::gtfs::Container, - ad::cppgtfs::gtfs::NullContainer, ad::cppgtfs::gtfs::ContContainer, - ad::cppgtfs::gtfs::ContContainer, ShapeContainer, - ad::cppgtfs::gtfs::Container, ad::cppgtfs::gtfs::Container, - ad::cppgtfs::gtfs::Container> + ad::cppgtfs::gtfs::Agency, Route, ad::cppgtfs::gtfs::Stop, Service, + StopTime, Shape, ad::cppgtfs::gtfs::Fare, ad::cppgtfs::gtfs::Container, + ad::cppgtfs::gtfs::ContContainer, ad::cppgtfs::gtfs::NullContainer, + ad::cppgtfs::gtfs::ContContainer, ad::cppgtfs::gtfs::ContContainer, + ShapeContainer, ad::cppgtfs::gtfs::NullContainer> Feed; typedef ad::cppgtfs::gtfs::TripB, Service, - ad::cppgtfs::gtfs::Route, Shape> + Route, Shape> Trip; } // namespace gtfs diff --git a/src/pfaedle/gtfs/Route.h b/src/pfaedle/gtfs/Route.h new file mode 100644 index 0000000..15ba7f0 --- /dev/null +++ b/src/pfaedle/gtfs/Route.h @@ -0,0 +1,61 @@ +// Copyright 2016, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_GTFS_ROUTE_H_ +#define PFAEDLE_GTFS_ROUTE_H_ + +#include +#include +#include +#include +#include +#include +#include "ad/cppgtfs/gtfs/Agency.h" +#include "ad/cppgtfs/gtfs/Route.h" +#include "util/Misc.h" + +using std::exception; +using std::string; + +namespace pfaedle { +namespace gtfs { + +class Route { + public: + typedef Route* Ref; + static std::string getId(Ref r) { return r->getId(); } + + Route() {} + + Route(const string& id, ad::cppgtfs::gtfs::Agency* agency, + const string& short_name, const string& long_name, const string& desc, + ad::cppgtfs::gtfs::flat::Route::TYPE type, const string& url, + uint32_t color, uint32_t text_color) + : _id(id), _short_name(short_name), _long_name(long_name), _type(type) { + UNUSED(agency); + UNUSED(desc); + UNUSED(url); + UNUSED(color); + UNUSED(text_color); + } + + const std::string& getId() const { return _id; } + + const std::string& getShortName() const { return _short_name; } + + const std::string& getLongName() const { return _long_name; } + + ad::cppgtfs::gtfs::flat::Route::TYPE getType() const { return _type; } + + private: + string _id; + string _short_name; + string _long_name; + ad::cppgtfs::gtfs::flat::Route::TYPE _type; +}; + +} // namespace gtfs +} // namespace pfaedle + +#endif // PFAEDLE_GTFS_ROUTE_H_ diff --git a/src/pfaedle/gtfs/ShapeContainer.h b/src/pfaedle/gtfs/ShapeContainer.h index be8a9b5..1128031 100644 --- a/src/pfaedle/gtfs/ShapeContainer.h +++ b/src/pfaedle/gtfs/ShapeContainer.h @@ -61,7 +61,6 @@ class ShapeContainer { size_t _max; std::string _curId; std::stringstream _writeBuffer; - std::fpos _lastBuff; }; #include "ShapeContainer.tpp" diff --git a/src/pfaedle/gtfs/ShapeContainer.tpp b/src/pfaedle/gtfs/ShapeContainer.tpp index 7d3d305..b55d646 100644 --- a/src/pfaedle/gtfs/ShapeContainer.tpp +++ b/src/pfaedle/gtfs/ShapeContainer.tpp @@ -2,20 +2,20 @@ // Chair of Algorithms and Data Structures. // Authors: Patrick Brosi -#include #include // ____________________________________________________________________________ template -ShapeContainer::ShapeContainer() : _lastBuff(0) { - std::string f = util::getTmpFName("", ".pfaedle-tmp", ""); +ShapeContainer::ShapeContainer() { + std::string f = pfaedle::getTmpFName("", ""); _storage.open(f, std::fstream::in | std::fstream::out | std::fstream::trunc); // immediately unlink unlink(f.c_str()); if (!_storage.good()) { - std::cerr << "Could not open temporary file " << f << std::endl; + std::cerr << "Could not open temporary file " << f + << std::endl; exit(1); } } @@ -23,8 +23,6 @@ ShapeContainer::ShapeContainer() : _lastBuff(0) { // ____________________________________________________________________________ template ShapeContainer::~ShapeContainer() { - _storage << _writeBuffer.rdbuf(); - _storage.flush(); _storage.close(); } @@ -45,15 +43,15 @@ bool ShapeContainer::remove(const std::string& id) { // ____________________________________________________________________________ template T* ShapeContainer::get(const std::string& id) { - UNUSED(id); - return reinterpret_cast(0); + if (!has(id)) return 0; + return reinterpret_cast(1); } // ____________________________________________________________________________ template const T* ShapeContainer::get(const std::string& id) const { - UNUSED(id); - return reinterpret_cast(0); + if (!has(id)) return 0; + return reinterpret_cast(1); } // ____________________________________________________________________________ @@ -72,22 +70,17 @@ size_t ShapeContainer::size() const { template std::string ShapeContainer::add(const ad::cppgtfs::gtfs::Shape& s) { if (has(s.getId())) return s.getId(); - size_t size = s.getPoints().size(); _ids.insert(s.getId()); - _writeBuffer << s.getId(); - _writeBuffer.put(' '); - _writeBuffer.write(reinterpret_cast(&size), sizeof(size)); - - for (const auto& p : s.getPoints()) { - _writeBuffer.write(reinterpret_cast(&p.lat), sizeof(p.lat)); - _writeBuffer.write(reinterpret_cast(&p.lng), sizeof(p.lng)); - _writeBuffer.write(reinterpret_cast(&p.travelDist), - sizeof(p.travelDist)); + _writeBuffer << s.getId() << '\t' << s.getPoints().size(); + _writeBuffer << std::setprecision(11); + for (auto p : s.getPoints()) { + _writeBuffer << " " << p.lat << " " << p.lng << " " << p.travelDist; } + // entries are newline separated + _writeBuffer << '\n'; - if (_writeBuffer.tellp() - _lastBuff > 1000 * 5000) { - _lastBuff = _writeBuffer.tellp(); + if (_writeBuffer.tellp() > 1000 * 5000) { _storage << _writeBuffer.rdbuf(); _writeBuffer.clear(); } @@ -99,7 +92,6 @@ std::string ShapeContainer::add(const ad::cppgtfs::gtfs::Shape& s) { template void ShapeContainer::open() { _storage << _writeBuffer.rdbuf(); - _storage.flush(); _writeBuffer.clear(); _ptr = 0; @@ -115,18 +107,14 @@ bool ShapeContainer::nextStoragePt( while (_storage.good() && !_storage.fail()) { if (!_ptr) { _storage >> _curId; - _storage.ignore(); - - _storage.read(reinterpret_cast(&_max), sizeof(_max)); + _storage >> _max; } if (!_storage.good() || _storage.fail()) return false; - _storage.read(reinterpret_cast(&ret->lat), sizeof(ret->lat)); - _storage.read(reinterpret_cast(&ret->lng), sizeof(ret->lng)); - _storage.read(reinterpret_cast(&ret->travelDist), - sizeof(ret->travelDist)); - + _storage >> ret->lat; + _storage >> ret->lng; + _storage >> ret->travelDist; ret->seq = _ptr + 1; ret->id = _curId; diff --git a/src/pfaedle/gtfs/StopTime.h b/src/pfaedle/gtfs/StopTime.h index db03334..fe18a52 100644 --- a/src/pfaedle/gtfs/StopTime.h +++ b/src/pfaedle/gtfs/StopTime.h @@ -26,15 +26,15 @@ class StopTime { typename StopT::Ref s, uint32_t seq, const std::string& hs, ad::cppgtfs::gtfs::flat::StopTime::PU_DO_TYPE put, ad::cppgtfs::gtfs::flat::StopTime::PU_DO_TYPE dot, float distTrav, - bool isTp, uint8_t continuousDropOff, - uint8_t continuousPickup) - : _s(s), _sequence(seq), _dist(distTrav), _at(at), _dt(dt), _isTp(isTp) { + bool isTp) + : _s(s), _sequence(seq), _dist(distTrav) { + UNUSED(at); + UNUSED(dt); UNUSED(hs); UNUSED(put); UNUSED(dot); UNUSED(distTrav); - UNUSED(continuousDropOff); - UNUSED(continuousPickup); + UNUSED(isTp); } const typename StopT::Ref getStop() const { return _s; } @@ -42,23 +42,20 @@ class StopTime { void setShapeDistanceTravelled(double d) { _dist = d; } ad::cppgtfs::gtfs::Time getArrivalTime() const { - return _at; + return ad::cppgtfs::gtfs::Time(0, 0, 0); } ad::cppgtfs::gtfs::Time getDepartureTime() const { - return _dt; + return ad::cppgtfs::gtfs::Time(0, 0, 0); } float getShapeDistanceTravelled() const { return _dist; } uint16_t getSeq() const { return _sequence; } - bool isTp() const { return _isTp; } private: typename StopT::Ref _s; uint32_t _sequence; float _dist; - ad::cppgtfs::gtfs::Time _at, _dt; - bool _isTp; }; template diff --git a/src/pfaedle/gtfs/Writer.cpp b/src/pfaedle/gtfs/Writer.cpp index c11dbdf..7a0c5b3 100644 --- a/src/pfaedle/gtfs/Writer.cpp +++ b/src/pfaedle/gtfs/Writer.cpp @@ -2,622 +2,423 @@ // Chair of Algorithms and Data Structures. // Authors: Patrick Brosi -#include - -#ifdef LIBZIP_FOUND -#include -#endif - #include #include #include -#include #include #include - #include "ad/cppgtfs/Parser.h" #include "ad/cppgtfs/Writer.h" #include "ad/cppgtfs/gtfs/flat/Agency.h" #include "ad/util/CsvWriter.h" #include "pfaedle/gtfs/Writer.h" -using ad::cppgtfs::Parser; using ad::util::CsvWriter; -#ifdef LIBZIP_FOUND -using ad::util::ZipCsvParser; -#endif +using ad::cppgtfs::Parser; using pfaedle::gtfs::Writer; -using util::getTmpFName; +using pfaedle::getTmpFName; // ____________________________________________________________________________ -void Writer::write(gtfs::Feed* sourceFeed, const std::string& path) const { - bool toZip = - (path.size() > 3 && 0 == path.compare(path.size() - 4, 4, ".zip")); - +bool Writer::write(gtfs::Feed* sourceFeed, const std::string& path) const { std::ofstream fs; + std::ifstream is; std::string gtfsPath(path); std::string curFile; std::string curFileTg; - std::string tmpZip; - std::string zipFileName; + curFile = getTmpFName(gtfsPath, "agency.txt"); + curFileTg = gtfsPath + "/agency.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeAgency(sourceFeed, &fs); + fs.close(); + if (std::rename(curFile.c_str(), curFileTg.c_str())) cannotWrite(curFileTg); - if (gtfsPath.size() == 0) gtfsPath = "."; + curFile = getTmpFName(gtfsPath, "stops.txt"); + curFileTg = gtfsPath + "/stops.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeStops(sourceFeed, &fs); + fs.close(); + if (std::rename(curFile.c_str(), curFileTg.c_str())) cannotWrite(curFileTg); -#ifdef LIBZIP_FOUND - zip* za = 0; + curFile = getTmpFName(gtfsPath, "routes.txt"); + curFileTg = gtfsPath + "/routes.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeRoutes(sourceFeed, &fs); + fs.close(); + if (std::rename(curFile.c_str(), curFileTg.c_str())) cannotWrite(curFileTg); - if (toZip) { - const size_t slashIdx = path.rfind('/'); - if (slashIdx != std::string::npos) { - zipFileName = path.substr(slashIdx + 1, -1); - gtfsPath = path.substr(0, slashIdx); - } else { - zipFileName = path; - gtfsPath = "."; - } - - tmpZip = getTmpFName(gtfsPath, ".pfaedle-tmp", zipFileName); - - int zipErr = 0; - za = zip_open(tmpZip.c_str(), ZIP_CREATE | ZIP_TRUNCATE, &zipErr); - - if (zipErr != 0) { - char errBuf[100]; - zip_error_to_str(errBuf, sizeof(errBuf), zipErr, errno); - cannotWrite(tmpZip, gtfsPath + "/" + zipFileName); - std::stringstream ss; - ss << "(temporary file for " << (gtfsPath + "/" + zipFileName) - << ") Could not open ZIP file, reason was: " << errBuf; - throw ad::cppgtfs::WriterException(ss.str(), tmpZip); - } -#else - if (toZip) { - throw ad::cppgtfs::WriterException( - "Could not output ZIP file, pfaedle was compiled without libzip", path); -#endif - } else { - mkdir(path.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); + is.open((sourceFeed->getPath() + "/calendar.txt").c_str()); + if (is.good()) { + is.close(); + curFile = getTmpFName(gtfsPath, "calendar.txt"); + curFileTg = gtfsPath + "/calendar.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeCalendar(sourceFeed, &fs); + fs.close(); + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); } - try { - Parser ip(sourceFeed->getPath()); - - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "agency.txt"); - curFileTg = gtfsPath + "/agency.txt"; + is.open((sourceFeed->getPath() + "/calendar_dates.txt").c_str()); + if (is.good()) { + is.close(); + curFile = getTmpFName(gtfsPath, "calendar_dates.txt"); + curFileTg = gtfsPath + "/calendar_dates.txt"; fs.open(curFile.c_str()); if (!fs.good()) cannotWrite(curFile, curFileTg); - writeAgency(sourceFeed, &fs); + writeCalendarDates(sourceFeed, &fs); fs.close(); - - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "agency.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "stops.txt"); - curFileTg = gtfsPath + "/stops.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - writeStops(sourceFeed, &fs); - fs.close(); - - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "stops.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "routes.txt"); - curFileTg = gtfsPath + "/routes.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - writeRoutes(sourceFeed, &fs); - fs.close(); - - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "routes.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - - auto csvp = ip.getCsvParser("calendar.txt"); - if (csvp->isGood()) { - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "calendar.txt"); - curFileTg = gtfsPath + "/calendar.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - writeCalendar(sourceFeed, &fs); - fs.close(); - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "calendar.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - } - - csvp = ip.getCsvParser("calendar_dates.txt"); - if (csvp->isGood()) { - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "calendar_dates.txt"); - curFileTg = gtfsPath + "/calendar_dates.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - writeCalendarDates(sourceFeed, &fs); - fs.close(); - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "calendar_dates.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - } - - csvp = ip.getCsvParser("transfers.txt"); - if (csvp->isGood()) { - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "transfers.txt"); - curFileTg = gtfsPath + "/transfers.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - writeTransfers(sourceFeed, &fs); - fs.close(); - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "transfers.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - } - - csvp = ip.getCsvParser("fare_attributes.txt"); - if (csvp->isGood()) { - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "fare_attributes.txt"); - curFileTg = gtfsPath + "/fare_attributes.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - writeFares(sourceFeed, &fs); - fs.close(); - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "fare_attributes.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - } - - csvp = ip.getCsvParser("fare_rules.txt"); - if (csvp->isGood()) { - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "fare_rules.txt"); - curFileTg = gtfsPath + "/fare_rules.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - writeFareRules(sourceFeed, &fs); - fs.close(); - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "fare_rules.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - } - - csvp = ip.getCsvParser("pathways.txt"); - if (csvp->isGood()) { - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "pathways.txt"); - curFileTg = gtfsPath + "/pathways.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - writePathways(sourceFeed, &fs); - fs.close(); - - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "pathways.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - } - - csvp = ip.getCsvParser("levels.txt"); - if (csvp->isGood()) { - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "levels.txt"); - curFileTg = gtfsPath + "/levels.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - writeLevels(sourceFeed, &fs); - fs.close(); - - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "levels.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - } - - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "shapes.txt"); - curFileTg = gtfsPath + "/shapes.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - writeShapes(sourceFeed, &fs); - fs.close(); - - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "shapes.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "trips.txt"); - curFileTg = gtfsPath + "/trips.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - bool hasFreqs = writeTrips(sourceFeed, &fs); - fs.close(); - - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "trips.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - - csvp = ip.getCsvParser("frequencies.txt"); - if (hasFreqs && csvp->isGood()) { - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "frequencies.txt"); - curFileTg = gtfsPath + "/frequencies.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - writeFrequencies(sourceFeed, &fs); - fs.close(); - - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "frequencies.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - } - - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "stop_times.txt"); - curFileTg = gtfsPath + "/stop_times.txt"; - fs.open(curFile.c_str()); - - if (!fs.good()) cannotWrite(curFile, curFileTg); - writeStopTimes(sourceFeed, &fs); - fs.close(); - - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "stop_times.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - - if (!sourceFeed->getPublisherUrl().empty() && - !sourceFeed->getPublisherName().empty()) { - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "feed_info.txt"); - curFileTg = gtfsPath + "/feed_info.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - writeFeedInfo(sourceFeed, &fs); - fs.close(); - - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "feed_info.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - } - - curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "attributions.txt"); - curFileTg = gtfsPath + "/attributions.txt"; - fs.open(curFile.c_str()); - if (!fs.good()) cannotWrite(curFile, curFileTg); - writeAttribution(sourceFeed, &fs); - fs.close(); - - if (toZip) { -#ifdef LIBZIP_FOUND - moveIntoZip(za, curFile, "attributions.txt"); -#endif - } else { - if (std::rename(curFile.c_str(), curFileTg.c_str())) - cannotWrite(curFileTg); - } - } catch (...) { -#ifdef LIBZIP_FOUND - zip_discard(za); -#endif - throw; + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); } - if (toZip) { -#ifdef LIBZIP_FOUND - std::string targetZipPath = gtfsPath + "/" + zipFileName; - if (!za) cannotWrite(targetZipPath); - zip_close(za); - if (std::rename(tmpZip.c_str(), targetZipPath.c_str())) - cannotWrite(targetZipPath); -#endif + is.open((sourceFeed->getPath() + "/transfers.txt").c_str()); + if (is.good()) { + is.close(); + curFile = getTmpFName(gtfsPath, "transfers.txt"); + curFileTg = gtfsPath + "/transfers.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeTransfers(sourceFeed, &fs); + fs.close(); + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); } + + is.open((sourceFeed->getPath() + "/fare_attributes.txt").c_str()); + if (is.good()) { + is.close(); + curFile = getTmpFName(gtfsPath, "fare_attributes.txt"); + curFileTg = gtfsPath + "/fare_attributes.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeFares(sourceFeed, &fs); + fs.close(); + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + + is.open((sourceFeed->getPath() + "/fare_rules.txt").c_str()); + if (is.good()) { + is.close(); + curFile = getTmpFName(gtfsPath, "fare_rules.txt"); + curFileTg = gtfsPath + "/fare_rules.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeFareRules(sourceFeed, &fs); + fs.close(); + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + + is.close(); + curFile = getTmpFName(gtfsPath, "shapes.txt"); + curFileTg = gtfsPath + "/shapes.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeShapes(sourceFeed, &fs); + fs.close(); + if (std::rename(curFile.c_str(), curFileTg.c_str())) cannotWrite(curFileTg); + + is.close(); + curFile = getTmpFName(gtfsPath, "trips.txt"); + curFileTg = gtfsPath + "/trips.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + bool hasFreqs = writeTrips(sourceFeed, &fs); + fs.close(); + if (std::rename(curFile.c_str(), curFileTg.c_str())) cannotWrite(curFileTg); + + is.open((sourceFeed->getPath() + "/frequencies.txt").c_str()); + if (hasFreqs && is.good()) { + is.close(); + curFile = getTmpFName(gtfsPath, "frequencies.txt"); + curFileTg = gtfsPath + "/frequencies.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeFrequencies(sourceFeed, &fs); + fs.close(); + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + + is.close(); + curFile = getTmpFName(gtfsPath, "stop_times.txt"); + curFileTg = gtfsPath + "/stop_times.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeStopTimes(sourceFeed, &fs); + fs.close(); + if (std::rename(curFile.c_str(), curFileTg.c_str())) cannotWrite(curFileTg); + + if (!sourceFeed->getPublisherUrl().empty() && + !sourceFeed->getPublisherName().empty()) { + curFile = getTmpFName(gtfsPath, "feed_info.txt"); + curFileTg = gtfsPath + "/feed_info.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeFeedInfo(sourceFeed, &fs); + fs.close(); + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + + return true; } // ____________________________________________________________________________ -void Writer::writeAttribution(gtfs::Feed*, std::ostream* os) const { - auto csvw = ad::cppgtfs::Writer::getAttributionCsvw(os); - - csvw->flushLine(); - csvw->writeString("OpenStreetMap contributors"); - csvw->writeString("https://www.openstreetmap.org/copyright"); - csvw->writeInt(1); - - csvw->flushLine(); -} - -// ____________________________________________________________________________ -void Writer::writeFeedInfo(gtfs::Feed* f, std::ostream* os) const { +bool Writer::writeFeedInfo(gtfs::Feed* f, std::ostream* os) const { auto csvw = ad::cppgtfs::Writer::getFeedInfoCsvw(os); - csvw->flushLine(); - csvw->writeString(f->getPublisherName()); - csvw->writeString(f->getPublisherUrl()); - csvw->writeString(f->getLang()); + csvw.flushLine(); + csvw.writeString(f->getPublisherName()); + csvw.writeString(f->getPublisherUrl()); + csvw.writeString(f->getLang()); if (!f->getStartDate().empty()) - csvw->writeInt(f->getStartDate().getYYYYMMDD()); + csvw.writeInt(f->getStartDate().getYYYYMMDD()); else - csvw->skip(); + csvw.skip(); if (!f->getEndDate().empty()) - csvw->writeInt(f->getEndDate().getYYYYMMDD()); + csvw.writeInt(f->getEndDate().getYYYYMMDD()); else - csvw->skip(); - csvw->writeString(f->getVersion()); - csvw->writeString(f->getContactEmail()); - csvw->writeString(f->getContactUrl()); - csvw->writeString(f->getDefaultLang()); - csvw->flushLine(); + csvw.skip(); + csvw.writeString(f->getVersion()); + csvw.flushLine(); + + return true; } // ____________________________________________________________________________ -void Writer::writePathways(gtfs::Feed* sourceFeed, std::ostream* os) const { - Parser p(sourceFeed->getPath()); - auto csvp = p.getCsvParser("pathways.txt"); +bool Writer::writeAgency(gtfs::Feed* sourceFeed, std::ostream* os) const { + std::ifstream fs; + fs.open((sourceFeed->getPath() + "/agency.txt").c_str()); + + CsvParser csvp(&fs); + Parser p; ad::cppgtfs::Writer w; - auto csvw = ad::cppgtfs::Writer::getPathwayCsvw(os); - csvw->flushLine(); - - ad::cppgtfs::gtfs::flat::Pathway fa; - auto flds = Parser::getPathwayFlds(csvp.get()); - - while (p.nextPathway(csvp.get(), &fa, flds)) { - w.writePathway(fa, csvw.get()); - } -} - -// ____________________________________________________________________________ -void Writer::writeLevels(gtfs::Feed* sourceFeed, std::ostream* os) const { - Parser p(sourceFeed->getPath()); - auto csvp = p.getCsvParser("levels.txt"); - ad::cppgtfs::Writer w; - - auto csvw = ad::cppgtfs::Writer::getLevelCsvw(os); - csvw->flushLine(); - - ad::cppgtfs::gtfs::flat::Level fa; - auto flds = Parser::getLevelFlds(csvp.get()); - - while (p.nextLevel(csvp.get(), &fa, flds)) { - w.writeLevel(fa, csvw.get()); - } -} - -// ____________________________________________________________________________ -void Writer::writeAgency(gtfs::Feed* sourceFeed, std::ostream* os) const { - Parser p(sourceFeed->getPath()); - auto csvp = p.getCsvParser("agency.txt"); - - ad::cppgtfs::Writer w; - - auto csvw = - ad::cppgtfs::Writer::getAgencyCsvw(os, sourceFeed->getAgencyAddFlds()); - csvw->flushLine(); + CsvWriter csvw = ad::cppgtfs::Writer::getAgencyCsvw(os); + csvw.flushLine(); ad::cppgtfs::gtfs::flat::Agency fa; - auto flds = Parser::getAgencyFlds(csvp.get()); + auto flds = Parser::getAgencyFlds(&csvp); - while (p.nextAgency(csvp.get(), &fa, flds)) { - w.writeAgency(fa, csvw.get(), sourceFeed->getAgencyAddFlds()); + while (p.nextAgency(&csvp, &fa, flds)) { + w.writeAgency(fa, &csvw); } + fs.close(); + + return true; } // ____________________________________________________________________________ -void Writer::writeStops(gtfs::Feed* sourceFeed, std::ostream* os) const { - Parser p(sourceFeed->getPath()); - auto csvp = p.getCsvParser("stops.txt"); +bool Writer::writeStops(gtfs::Feed* sourceFeed, std::ostream* os) const { + std::ifstream fs; + fs.open((sourceFeed->getPath() + "/stops.txt").c_str()); + + CsvParser csvp(&fs); + Parser p; ad::cppgtfs::Writer w; - auto csvw = - ad::cppgtfs::Writer::getStopsCsvw(os, sourceFeed->getStopAddFlds()); - csvw->flushLine(); + CsvWriter csvw = ad::cppgtfs::Writer::getStopsCsvw(os); + csvw.flushLine(); ad::cppgtfs::gtfs::flat::Stop s; - auto flds = Parser::getStopFlds(csvp.get()); + auto flds = Parser::getStopFlds(&csvp); - while (p.nextStop(csvp.get(), &s, flds)) { - w.writeStop(s, csvw.get(), sourceFeed->getStopAddFlds()); + while (p.nextStop(&csvp, &s, flds)) { + w.writeStop(s, &csvw); } + fs.close(); + + return true; } // ____________________________________________________________________________ -void Writer::writeRoutes(gtfs::Feed* sourceFeed, std::ostream* os) const { +bool Writer::writeRoutes(gtfs::Feed* sourceFeed, std::ostream* os) const { + std::ifstream fs; + fs.open((sourceFeed->getPath() + "/routes.txt").c_str()); + + CsvParser csvp(&fs); + Parser p; ad::cppgtfs::Writer w; - auto csvw = - ad::cppgtfs::Writer::getRoutesCsvw(os, sourceFeed->getRouteAddFlds()); - csvw->flushLine(); + CsvWriter csvw = ad::cppgtfs::Writer::getRoutesCsvw(os); + csvw.flushLine(); - for (auto r : sourceFeed->getRoutes()) { - w.writeRoute(r.second->getFlat(), csvw.get(), - sourceFeed->getRouteAddFlds()); + ad::cppgtfs::gtfs::flat::Route s; + auto flds = Parser::getRouteFlds(&csvp); + + while (p.nextRoute(&csvp, &s, flds)) { + w.writeRoute(s, &csvw); } + fs.close(); + + return true; } // ____________________________________________________________________________ -void Writer::writeCalendar(gtfs::Feed* sourceFeed, std::ostream* os) const { - Parser p(sourceFeed->getPath()); - auto csvp = p.getCsvParser("calendar.txt"); +bool Writer::writeCalendar(gtfs::Feed* sourceFeed, std::ostream* os) const { + std::ifstream fs; + fs.open((sourceFeed->getPath() + "/calendar.txt").c_str()); + + CsvParser csvp(&fs); + Parser p; ad::cppgtfs::Writer w; - auto csvw = ad::cppgtfs::Writer::getCalendarCsvw(os); - csvw->flushLine(); + CsvWriter csvw = ad::cppgtfs::Writer::getCalendarCsvw(os); + csvw.flushLine(); ad::cppgtfs::gtfs::flat::Calendar c; - auto flds = Parser::getCalendarFlds(csvp.get()); + auto flds = Parser::getCalendarFlds(&csvp); - while (p.nextCalendar(csvp.get(), &c, flds)) { - w.writeCalendar(c, csvw.get()); + while (p.nextCalendar(&csvp, &c, flds)) { + w.writeCalendar(c, &csvw); } + fs.close(); + + return true; } // ____________________________________________________________________________ -void Writer::writeCalendarDates(gtfs::Feed* sourceFeed, +bool Writer::writeCalendarDates(gtfs::Feed* sourceFeed, std::ostream* os) const { - Parser p(sourceFeed->getPath()); - auto csvp = p.getCsvParser("calendar_dates.txt"); + std::ifstream fs; + fs.open((sourceFeed->getPath() + "/calendar_dates.txt").c_str()); + + CsvParser csvp(&fs); + Parser p; ad::cppgtfs::Writer w; - auto csvw = ad::cppgtfs::Writer::getCalendarDatesCsvw(os); - csvw->flushLine(); + CsvWriter csvw = ad::cppgtfs::Writer::getCalendarDatesCsvw(os); + csvw.flushLine(); ad::cppgtfs::gtfs::flat::CalendarDate c; - auto flds = Parser::getCalendarDateFlds(csvp.get()); + auto flds = Parser::getCalendarDateFlds(&csvp); - while (p.nextCalendarDate(csvp.get(), &c, flds)) { - w.writeCalendarDate(c, csvw.get()); + while (p.nextCalendarDate(&csvp, &c, flds)) { + w.writeCalendarDate(c, &csvw); } + fs.close(); + + return true; } // ____________________________________________________________________________ -void Writer::writeFrequencies(gtfs::Feed* sourceFeed, std::ostream* os) const { - Parser p(sourceFeed->getPath()); - auto csvp = p.getCsvParser("frequencies.txt"); +bool Writer::writeFrequencies(gtfs::Feed* sourceFeed, std::ostream* os) const { + std::ifstream fs; + fs.open((sourceFeed->getPath() + "/frequencies.txt").c_str()); + + CsvParser csvp(&fs); + Parser p; ad::cppgtfs::Writer w; - auto csvw = ad::cppgtfs::Writer::getFrequencyCsvw(os); - csvw->flushLine(); + CsvWriter csvw = ad::cppgtfs::Writer::getFrequencyCsvw(os); + csvw.flushLine(); ad::cppgtfs::gtfs::flat::Frequency f; - auto flds = Parser::getFrequencyFlds(csvp.get()); + auto flds = Parser::getFrequencyFlds(&csvp); - while (p.nextFrequency(csvp.get(), &f, flds)) { - w.writeFrequency(f, csvw.get()); + while (p.nextFrequency(&csvp, &f, flds)) { + w.writeFrequency(f, &csvw); } + fs.close(); + + return true; } // ____________________________________________________________________________ -void Writer::writeTransfers(gtfs::Feed* sourceFeed, std::ostream* os) const { - Parser p(sourceFeed->getPath()); - auto csvp = p.getCsvParser("transfers.txt"); +bool Writer::writeTransfers(gtfs::Feed* sourceFeed, std::ostream* os) const { + std::ifstream fs; + fs.open((sourceFeed->getPath() + "/transfers.txt").c_str()); + + CsvParser csvp(&fs); + Parser p; ad::cppgtfs::Writer w; - auto csvw = ad::cppgtfs::Writer::getTransfersCsvw(os); - csvw->flushLine(); + CsvWriter csvw = ad::cppgtfs::Writer::getTransfersCsvw(os); + csvw.flushLine(); ad::cppgtfs::gtfs::flat::Transfer t; - auto flds = Parser::getTransfersFlds(csvp.get()); + auto flds = Parser::getTransfersFlds(&csvp); - while (p.nextTransfer(csvp.get(), &t, flds)) { - w.writeTransfer(t, csvw.get()); + while (p.nextTransfer(&csvp, &t, flds)) { + w.writeTransfer(t, &csvw); } + fs.close(); + + return true; } // ____________________________________________________________________________ -void Writer::writeFares(gtfs::Feed* sourceFeed, std::ostream* os) const { - Parser p(sourceFeed->getPath()); - auto csvp = p.getCsvParser("fare_attributes.txt"); +bool Writer::writeFares(gtfs::Feed* sourceFeed, std::ostream* os) const { + std::ifstream fs; + fs.open((sourceFeed->getPath() + "/fare_attributes.txt").c_str()); + + CsvParser csvp(&fs); + Parser p; ad::cppgtfs::Writer w; - auto csvw = ad::cppgtfs::Writer::getFaresCsvw(os); - csvw->flushLine(); + CsvWriter csvw = ad::cppgtfs::Writer::getFaresCsvw(os); + csvw.flushLine(); ad::cppgtfs::gtfs::flat::Fare f; - auto flds = Parser::getFareFlds(csvp.get()); + auto flds = Parser::getFareFlds(&csvp); - while (p.nextFare(csvp.get(), &f, flds)) { - w.writeFare(f, csvw.get()); + while (p.nextFare(&csvp, &f, flds)) { + w.writeFare(f, &csvw); } + fs.close(); + + return true; } // ____________________________________________________________________________ -void Writer::writeFareRules(gtfs::Feed* sourceFeed, std::ostream* os) const { - Parser p(sourceFeed->getPath()); - auto csvp = p.getCsvParser("fare_rules.txt"); +bool Writer::writeFareRules(gtfs::Feed* sourceFeed, std::ostream* os) const { + std::ifstream fs; + fs.open((sourceFeed->getPath() + "/fare_rules.txt").c_str()); + + CsvParser csvp(&fs); + Parser p; ad::cppgtfs::Writer w; - auto csvw = ad::cppgtfs::Writer::getFareRulesCsvw(os); - csvw->flushLine(); + CsvWriter csvw = ad::cppgtfs::Writer::getFareRulesCsvw(os); + csvw.flushLine(); ad::cppgtfs::gtfs::flat::FareRule f; - auto flds = Parser::getFareRuleFlds(csvp.get()); + auto flds = Parser::getFareRuleFlds(&csvp); - while (p.nextFareRule(csvp.get(), &f, flds)) { - w.writeFareRule(f, csvw.get()); + while (p.nextFareRule(&csvp, &f, flds)) { + w.writeFareRule(f, &csvw); } + fs.close(); + + return true; } // ____________________________________________________________________________ -void Writer::writeShapes(gtfs::Feed* sourceFeed, std::ostream* os) const { - auto csvw = ad::cppgtfs::Writer::getShapesCsvw(os); - csvw->flushLine(); +bool Writer::writeShapes(gtfs::Feed* sourceFeed, std::ostream* os) const { + std::ifstream fs; + fs.open((sourceFeed->getPath() + "/shapes.txt").c_str()); + + CsvWriter csvw = ad::cppgtfs::Writer::getShapesCsvw(os); + csvw.flushLine(); ad::cppgtfs::gtfs::flat::ShapePoint sp; ad::cppgtfs::Writer w; - Parser p(sourceFeed->getPath()); - auto csvp = p.getCsvParser("shapes.txt"); + if (fs.good()) { + CsvParser csvp(&fs); + Parser p; - if (csvp->isGood()) { - auto flds = Parser::getShapeFlds(csvp.get()); + auto flds = Parser::getShapeFlds(&csvp); std::string curShapeId; std::string curSkipShapeId; - while (p.nextShapePoint(csvp.get(), &sp, flds)) { + while (p.nextShapePoint(&csvp, &sp, flds)) { if (sp.id == curSkipShapeId) continue; if (sp.id != curShapeId) { if (sourceFeed->getShapes().has(sp.id)) { @@ -628,14 +429,18 @@ void Writer::writeShapes(gtfs::Feed* sourceFeed, std::ostream* os) const { } } - w.writeShapePoint(sp, csvw.get()); + w.writeShapePoint(sp, &csvw); } } sourceFeed->getShapes().open(); while (sourceFeed->getShapes().nextStoragePt(&sp)) { - w.writeShapePoint(sp, csvw.get()); + w.writeShapePoint(sp, &csvw); } + + fs.close(); + + return true; } // ____________________________________________________________________________ @@ -643,34 +448,36 @@ bool Writer::writeTrips(gtfs::Feed* sourceFeed, std::ostream* os) const { ad::cppgtfs::Writer w; bool hasFreqs = false; - auto csvw = - ad::cppgtfs::Writer::getTripsCsvw(os, sourceFeed->getTripAddFlds()); - csvw->flushLine(); + CsvWriter csvw = ad::cppgtfs::Writer::getTripsCsvw(os); + csvw.flushLine(); for (auto t : sourceFeed->getTrips()) { if (t.getFrequencies().size()) hasFreqs = true; - w.writeTrip(t.getFlat(), csvw.get(), sourceFeed->getTripAddFlds()); + w.writeTrip(t.getFlat(), &csvw); } return hasFreqs; } // ____________________________________________________________________________ -void Writer::writeStopTimes(gtfs::Feed* sourceFeed, std::ostream* os) const { - Parser p(sourceFeed->getPath()); - auto csvp = p.getCsvParser("stop_times.txt"); +bool Writer::writeStopTimes(gtfs::Feed* sourceFeed, std::ostream* os) const { + std::ifstream fs; + fs.open((sourceFeed->getPath() + "/stop_times.txt").c_str()); + + CsvParser csvp(&fs); + Parser p; ad::cppgtfs::Writer w; - auto csvw = ad::cppgtfs::Writer::getStopTimesCsvw(os); - csvw->flushLine(); + CsvWriter csvw = ad::cppgtfs::Writer::getStopTimesCsvw(os); + csvw.flushLine(); ad::cppgtfs::gtfs::flat::StopTime st; - auto flds = Parser::getStopTimeFlds(csvp.get()); + auto flds = Parser::getStopTimeFlds(&csvp); std::string curTripId; Trip* cur = 0; - while (p.nextStopTime(csvp.get(), &st, flds)) { + while (p.nextStopTime(&csvp, &st, flds)) { // we may have changed to distance field if (curTripId != st.trip) { cur = sourceFeed->getTrips().get(st.trip); @@ -681,8 +488,11 @@ void Writer::writeStopTimes(gtfs::Feed* sourceFeed, std::ostream* os) const { st.shapeDistTravelled = stN.getShapeDistanceTravelled(); } - w.writeStopTime(st, csvw.get()); + w.writeStopTime(st, &csvw); } + fs.close(); + + return true; } // ___________________________________________________________________________ @@ -699,25 +509,3 @@ void Writer::cannotWrite(const std::string& file, const std::string& file2) { throw ad::cppgtfs::WriterException(ss.str(), file); } -// ___________________________________________________________________________ -#ifdef LIBZIP_FOUND -void Writer::moveIntoZip(zip* za, const std::string& sourcePath, - const std::string& targetPath) { - zip_source_t* s; - FILE* fp = fopen(sourcePath.c_str(), "r"); - if (fp == 0) { - std::stringstream ss; - ss << "(temporary file for " << targetPath << ") Could not open file"; - throw ad::cppgtfs::WriterException(ss.str(), sourcePath); - } - - // immediately unlink - unlink(sourcePath.c_str()); - - if ((s = zip_source_filep(za, fp, 0, -1)) == 0 || - zip_file_add(za, targetPath.c_str(), s, ZIP_FL_ENC_UTF_8) < 0) { - zip_source_free(s); - cannotWrite(targetPath); - } -} -#endif diff --git a/src/pfaedle/gtfs/Writer.h b/src/pfaedle/gtfs/Writer.h index e824edc..917195d 100644 --- a/src/pfaedle/gtfs/Writer.h +++ b/src/pfaedle/gtfs/Writer.h @@ -5,15 +5,9 @@ #ifndef PFAEDLE_GTFS_WRITER_H_ #define PFAEDLE_GTFS_WRITER_H_ -#include #include -#ifdef LIBZIP_FOUND -#include -#endif - -#include "Feed.h" -#include "ad/cppgtfs/Parser.h" #include "ad/cppgtfs/Writer.h" +#include "Feed.h" namespace pfaedle { namespace gtfs { @@ -22,35 +16,25 @@ class Writer { public: Writer() {} - void write(Feed* sourceFeed, const std::string& path) const; + bool write(Feed* sourceFeed, const std::string& path) const; private: - void writeFeedInfo(Feed* f, std::ostream* os) const; - void writeAgency(Feed* f, std::ostream* os) const; - void writeStops(Feed* f, std::ostream* os) const; - void writeRoutes(Feed* f, std::ostream* os) const; - void writeCalendar(Feed* f, std::ostream* os) const; - void writeCalendarDates(Feed* f, std::ostream* os) const; - void writeFrequencies(Feed* f, std::ostream* os) const; - void writeTransfers(Feed* f, std::ostream* os) const; - void writeFares(Feed* f, std::ostream* os) const; - void writeFareRules(Feed* f, std::ostream* os) const; - void writeShapes(Feed* f, std::ostream* os) const; + bool writeFeedInfo(Feed* f, std::ostream* os) const; + bool writeAgency(Feed* f, std::ostream* os) const; + bool writeStops(Feed* f, std::ostream* os) const; + bool writeRoutes(Feed* f, std::ostream* os) const; + bool writeCalendar(Feed* f, std::ostream* os) const; + bool writeCalendarDates(Feed* f, std::ostream* os) const; + bool writeFrequencies(Feed* f, std::ostream* os) const; + bool writeTransfers(Feed* f, std::ostream* os) const; + bool writeFares(Feed* f, std::ostream* os) const; + bool writeFareRules(Feed* f, std::ostream* os) const; + bool writeShapes(Feed* f, std::ostream* os) const; bool writeTrips(Feed* f, std::ostream* os) const; - void writeStopTimes(Feed* f, std::ostream* os) const; - void writeLevels(Feed* f, std::ostream* os) const; - void writePathways(Feed* f, std::ostream* os) const; - void writeAttribution(Feed* f, std::ostream* os) const; + bool writeStopTimes(Feed* f, std::ostream* os) const; static void cannotWrite(const std::string& file, const std::string& file2); static void cannotWrite(const std::string& file); - -#ifdef LIBZIP_FOUND - static void moveIntoZip(zip* za, const std::string& sourcePath, - const std::string& targetPath); -#endif - - mutable std::ifstream _ifs; }; } // namespace gtfs diff --git a/src/pfaedle/netgraph/EdgePL.h b/src/pfaedle/netgraph/EdgePL.h index c6803ea..58b767f 100644 --- a/src/pfaedle/netgraph/EdgePL.h +++ b/src/pfaedle/netgraph/EdgePL.h @@ -26,7 +26,7 @@ namespace netgraph { class EdgePL { public: EdgePL() {} - EdgePL(const LINE& l, const std::vector& trips) + EdgePL(const LINE& l, const std::set& trips) : _l(l), _trips(trips) { for (const auto t : _trips) { _routeShortNames.insert(t->getRoute()->getShortName()); @@ -46,7 +46,7 @@ class EdgePL { private: LINE _l; - std::vector _trips; + std::set _trips; std::set _routeShortNames; std::set _tripShortNames; }; diff --git a/src/pfaedle/osm/BBoxIdx.cpp b/src/pfaedle/osm/BBoxIdx.cpp index c841d47..c25d378 100644 --- a/src/pfaedle/osm/BBoxIdx.cpp +++ b/src/pfaedle/osm/BBoxIdx.cpp @@ -31,10 +31,10 @@ bool BBoxIdx::contains(const Point& p) const { // _____________________________________________________________________________ BOX BBoxIdx::getFullWebMercBox() const { return BOX( - util::geo::latLngToWebMerc(_root.box.getLowerLeft().getY(), - _root.box.getLowerLeft().getX()), - util::geo::latLngToWebMerc(_root.box.getUpperRight().getY(), - _root.box.getUpperRight().getX())); + util::geo::latLngToWebMerc( + _root.box.getLowerLeft().getY(), _root.box.getLowerLeft().getX()), + util::geo::latLngToWebMerc( + _root.box.getUpperRight().getY(), _root.box.getUpperRight().getX())); } // _____________________________________________________________________________ diff --git a/src/pfaedle/osm/Osm.h b/src/pfaedle/osm/Osm.h index a57f9ea..2fd5bfa 100644 --- a/src/pfaedle/osm/Osm.h +++ b/src/pfaedle/osm/Osm.h @@ -5,12 +5,10 @@ #ifndef PFAEDLE_OSM_OSM_H_ #define PFAEDLE_OSM_OSM_H_ -#include - #include #include -#include #include +#include #include namespace pfaedle { diff --git a/src/pfaedle/osm/OsmBuilder.cpp b/src/pfaedle/osm/OsmBuilder.cpp index ce30315..3da48df 100644 --- a/src/pfaedle/osm/OsmBuilder.cpp +++ b/src/pfaedle/osm/OsmBuilder.cpp @@ -3,7 +3,6 @@ // Authors: Patrick Brosi #include - #include #include #include @@ -13,18 +12,17 @@ #include #include #include - #include "pfaedle/Def.h" -#include "pfaedle/_config.h" #include "pfaedle/osm/BBoxIdx.h" #include "pfaedle/osm/Osm.h" #include "pfaedle/osm/OsmBuilder.h" #include "pfaedle/osm/OsmFilter.h" #include "pfaedle/osm/Restrictor.h" +#include "pfaedle/trgraph/StatGroup.h" #include "util/Misc.h" #include "util/Nullable.h" #include "util/log/Log.h" -#include "pfxml/pfxml.h" +#include "xml/pfxml.h" using ad::cppgtfs::gtfs::Stop; using pfaedle::osm::BlockSearch; @@ -42,14 +40,20 @@ using pfaedle::trgraph::Graph; using pfaedle::trgraph::Node; using pfaedle::trgraph::NodePL; using pfaedle::trgraph::Normalizer; +using pfaedle::trgraph::StatGroup; using pfaedle::trgraph::StatInfo; using pfaedle::trgraph::TransitEdgeLine; using util::Nullable; using util::geo::Box; -using util::geo::M_PER_DEG; +using util::geo::webMercMeterDist; // _____________________________________________________________________________ bool EqSearch::operator()(const Node* cand, const StatInfo* si) const { + if (orphanSnap && cand->pl().getSI() && + (!cand->pl().getSI()->getGroup() || + cand->pl().getSI()->getGroup()->getStops().size() == 0)) { + return true; + } return cand->pl().getSI() && cand->pl().getSI()->simi(si) > minSimi; } @@ -58,8 +62,8 @@ OsmBuilder::OsmBuilder() {} // _____________________________________________________________________________ void OsmBuilder::read(const std::string& path, const OsmReadOpts& opts, - Graph* g, const BBoxIdx& bbox, double gridSize, - Restrictor* res) { + Graph* g, const BBoxIdx& bbox, size_t gridSize, + router::FeedStops* fs, Restrictor* res) { if (!bbox.size()) return; LOG(INFO) << "Reading OSM file " << path << " ... "; @@ -97,132 +101,85 @@ void OsmBuilder::read(const std::string& path, const OsmReadOpts& opts, // * match the filter criteria // * have been used in a way in pass 3 - LOG(DEBUG) << "Reading bounding box nodes..."; + LOG(VDEBUG) << "Reading bounding box nodes..."; skipUntil(&xml, "node"); pfxml::parser_state nodeBeg = xml.state(); pfxml::parser_state edgesBeg = readBBoxNds(&xml, &bboxNodes, &noHupNodes, filter, bbox); - LOG(DEBUG) << "Reading relations..."; + LOG(VDEBUG) << "Reading relations..."; skipUntil(&xml, "relation"); readRels(&xml, &intmRels, &nodeRels, &wayRels, filter, attrKeys[2], &rawRests); - LOG(DEBUG) << "Reading edges..."; + LOG(VDEBUG) << "Reading edges..."; xml.set_state(edgesBeg); readEdges(&xml, g, intmRels, wayRels, filter, bboxNodes, &nodes, &multNodes, noHupNodes, attrKeys[1], rawRests, res, intmRels.flat, &eTracks, opts); - LOG(DEBUG) << "Reading kept nodes..."; + LOG(VDEBUG) << "Reading kept nodes..."; xml.set_state(nodeBeg); readNodes(&xml, g, intmRels, nodeRels, filter, bboxNodes, &nodes, &multNodes, &orphanStations, attrKeys[0], intmRels.flat, opts); } - LOG(DEBUG) << "OSM ID set lookups: " << osm::OsmIdSet::LOOKUPS - << ", file lookups: " << osm::OsmIdSet::FLOOKUPS; + LOG(VDEBUG) << "OSM ID set lookups: " << osm::OsmIdSet::LOOKUPS + << ", file lookups: " << osm::OsmIdSet::FLOOKUPS; - LOG(DEBUG) << "Applying edge track numbers..."; + LOG(VDEBUG) << "Applying edge track numbers..."; writeEdgeTracks(eTracks); eTracks.clear(); { - LOG(DEBUG) << "Fixing gaps..."; - NodeGrid ng = buildNodeIdx(g, gridSize, bbox.getFullBox(), false); - LOG(DEBUG) << "Grid size of " << ng.getXWidth() << "x" << ng.getYHeight(); + LOG(VDEBUG) << "Fixing gaps..."; + NodeGrid ng = buildNodeIdx(g, gridSize, bbox.getFullWebMercBox(), false); fixGaps(g, &ng); } - LOG(DEBUG) << "Snapping stations..."; - snapStats(opts, g, bbox, gridSize, res, orphanStations); + LOG(VDEBUG) << "Writing edge geoms..."; + writeGeoms(g); - LOG(DEBUG) << "Collapsing edges..."; + LOG(VDEBUG) << "Snapping stations..."; + snapStats(opts, g, bbox, gridSize, fs, res, orphanStations); + + LOG(VDEBUG) << "Deleting orphan nodes..."; + deleteOrphNds(g); + + LOG(VDEBUG) << "Deleting orphan edges..."; + deleteOrphEdgs(g, opts); + + LOG(VDEBUG) << "Collapsing edges..."; collapseEdges(g); - LOG(DEBUG) << "Writing edge geoms..."; - writeGeoms(g, opts); + LOG(VDEBUG) << "Deleting orphan nodes..."; + deleteOrphNds(g); - LOG(DEBUG) << "Deleting orphan nodes..."; - deleteOrphNds(g, opts); + LOG(VDEBUG) << "Deleting orphan edges..."; + deleteOrphEdgs(g, opts); - LOG(DEBUG) << "Writing graph components..."; + LOG(VDEBUG) << "Writing graph components..."; // the restrictor is needed here to prevent connections in the graph // which are not possible in reality - uint32_t comps = writeComps(g, opts); + uint32_t comps = writeComps(g); - LOG(DEBUG) << "Simplifying geometries..."; + LOG(VDEBUG) << "Simplifying geometries..."; simplifyGeoms(g); - LOG(DEBUG) << "Writing other-direction edges..."; + LOG(VDEBUG) << "Writing other-direction edges..."; writeODirEdgs(g, res); - LOG(DEBUG) << "Write wrong-direction costs..."; - writeOneWayPens(g, opts); - - if (opts.noLinesPunishFact != 1.0) { - LOG(DEBUG) << "Write no-line pens..."; - writeNoLinePens(g, opts); - } - - LOG(DEBUG) << "Write dummy node self-edges..."; + LOG(VDEBUG) << "Write dummy node self-edges..."; writeSelfEdgs(g); size_t numEdges = 0; - for (auto* n : g->getNds()) { + for (auto* n : *g->getNds()) { numEdges += n->getAdjListOut().size(); } - LOG(DEBUG) << "Graph has " << g->getNds().size() << " nodes, " << numEdges - << " edges and " << comps - << " connected component(s) with more than 1 node"; - LOG(DEBUG) << _lines.size() << " transit lines have been read."; -} - -// _____________________________________________________________________________ -void OsmBuilder::osmfilterRuleWrite(std::ostream* out, - const std::vector& opts, - const BBoxIdx& latLngBox) const { - UNUSED(latLngBox); - OsmIdSet bboxNodes, noHupNodes; - MultAttrMap emptyF; - - RelLst rels; - OsmIdList ways; - RelMap nodeRels, wayRels; - - NIdMap nodes; - - OsmFilter filter; - - AttrKeySet attrKeys[3] = {}; - - for (const OsmReadOpts& o : opts) { - filter = filter.merge(OsmFilter(o.keepFilter, o.dropFilter)); - getKeptAttrKeys(o, attrKeys); - } - - *out << "--keep=\n"; - - for (auto r : filter.getKeepRules()) { - for (auto val : r.second) { - *out << r.first << "="; - if (val.first != "*") *out << val.first; - *out << "\n"; - } - } - - *out << "\n"; - - *out << "--keep-tags=\n"; - *out << "all\n"; - - for (const auto& keys : attrKeys) { - for (auto val : keys) { - *out << val << "=\n"; - } - } + LOG(DEBUG) << "Graph has " << g->getNds()->size() << " nodes, " << numEdges + << " edges and " << comps << " connected component(s)"; } // _____________________________________________________________________________ @@ -310,7 +267,7 @@ void OsmBuilder::overpassQryWrite(std::ostream* out, // _____________________________________________________________________________ void OsmBuilder::filterWrite(const std::string& in, const std::string& out, const std::vector& opts, - const BBoxIdx& box) { + const BBoxIdx& latLngBox) { OsmIdSet bboxNodes, noHupNodes; MultAttrMap emptyF; @@ -327,57 +284,13 @@ void OsmBuilder::filterWrite(const std::string& in, const std::string& out, NIdMultMap multNodes; pfxml::file xml(in); + std::ofstream outstr; + outstr.open(out); - BBoxIdx latLngBox = box; + util::xml::XmlWriter wr(&outstr, true, 4); - if (latLngBox.size() == 0) { - skipUntil(&xml, "bounds"); - - const pfxml::tag& cur = xml.get(); - - if (strcmp(cur.name, "bounds") != 0) { - throw pfxml::parse_exc( - std::string("Could not find required tag"), in, 0, 0, 0); - } - - if (!cur.attr("minlat")) { - throw pfxml::parse_exc( - std::string( - "Could not find required attribute \"minlat\" for tag"), - in, 0, 0, 0); - } - if (!cur.attr("minlon")) { - throw pfxml::parse_exc( - std::string( - "Could not find required attribute \"minlon\" for tag"), - in, 0, 0, 0); - } - if (!cur.attr("maxlat")) { - throw pfxml::parse_exc( - std::string( - "Could not find required attribute \"maxlat\" for tag"), - in, 0, 0, 0); - } - if (!cur.attr("maxlon")) { - throw pfxml::parse_exc( - std::string( - "Could not find required attribute \"maxlon\" for tag"), - in, 0, 0, 0); - } - - double minlat = atof(cur.attr("minlat")); - double minlon = atof(cur.attr("minlon")); - double maxlat = atof(cur.attr("maxlat")); - double maxlon = atof(cur.attr("maxlon")); - - latLngBox.add(Box({minlon, minlat}, {maxlon, maxlat})); - } - - util::xml::XmlWriter wr(out, false, 0); - - wr.put("\n"); - wr.openTag("osm", {{"version", "0.6"}, - {"generator", std::string("pfaedle/") + VERSION_FULL}}); + outstr << "\n"; + wr.openTag("osm"); wr.openTag( "bounds", {{"minlat", std::to_string(latLngBox.getFullBox().getLowerLeft().getY())}, @@ -510,9 +423,10 @@ void OsmBuilder::readWriteWays(pfxml::file* i, util::xml::XmlWriter* o, // _____________________________________________________________________________ NodePL OsmBuilder::plFromGtfs(const Stop* s, const OsmReadOpts& ops) { - NodePL ret({s->getLat(), s->getLng()}, - StatInfo(ops.statNormzer.norm(s->getName()), - ops.trackNormzer.norm(s->getPlatformCode()))); + NodePL ret( + util::geo::latLngToWebMerc(s->getLat(), s->getLng()), + StatInfo(ops.statNormzer.norm(s->getName()), + ops.trackNormzer.norm(s->getPlatformCode()), false)); #ifdef PFAEDLE_STATION_IDS // debug feature, store station id from GTFS @@ -540,7 +454,8 @@ pfxml::parser_state OsmBuilder::readBBoxNds(pfxml::file* xml, OsmIdSet* nodes, if (inNodeBlock && xml->level() == 3 && curId && strcmp(cur.name, "tag") == 0) { - if (filter.nohup(cur.attr("k"), cur.attr("v"))) { + if (filter.nohup(cur.attrs.find("k")->second, + cur.attrs.find("v")->second)) { nohupNodes->add(curId); } } @@ -551,15 +466,12 @@ pfxml::parser_state OsmBuilder::readBBoxNds(pfxml::file* xml, OsmIdSet* nodes, if (inNodeBlock) { // block ended if (strcmp(cur.name, "node")) return xml->state(); - double y = util::atof(cur.attr("lat"), 7); - double x = util::atof(cur.attr("lon"), 7); - - curId = util::atoul(cur.attr("id")); + double y = util::atof(cur.attrs.find("lat")->second, 7); + double x = util::atof(cur.attrs.find("lon")->second, 7); if (bbox.contains(Point(x, y))) { + curId = util::atoul(cur.attrs.find("id")->second); nodes->add(curId); - } else { - nodes->nadd(curId); } } } while (xml->next()); @@ -577,16 +489,16 @@ OsmWay OsmBuilder::nextWayWithId(pfxml::file* xml, osmid wid, if (xml->level() == 2 || xml->level() == 0) { if (w.id || strcmp(cur.name, "way")) return w; - osmid id = util::atoul(cur.attr("id")); + osmid id = util::atoul(cur.attrs.find("id")->second); if (id == wid) w.id = id; } if (w.id && xml->level() == 3) { if (strcmp(cur.name, "nd") == 0) { - w.nodes.push_back(util::atoul(cur.attr("ref"))); + w.nodes.push_back(util::atoul(cur.attrs.find("ref")->second)); } else if (strcmp(cur.name, "tag") == 0) { - if (keepAttrs.count(cur.attr("k"))) - w.attrs[cur.attr("k")] = cur.attr("v"); + if (keepAttrs.count(cur.attrs.find("k")->second)) + w.attrs[cur.attrs.find("k")->second] = cur.attrs.find("v")->second; } } } while (xml->next()); @@ -630,18 +542,18 @@ OsmWay OsmBuilder::nextWay(pfxml::file* xml, const RelMap& wayRels, if (keepWay(w, wayRels, filter, bBoxNodes, fl)) return w; if (strcmp(cur.name, "way")) return OsmWay(); - w.id = util::atoul(cur.attr("id")); + w.id = util::atoul(cur.attrs.find("id")->second); w.nodes.clear(); w.attrs.clear(); } if (w.id && xml->level() == 3) { if (strcmp(cur.name, "nd") == 0) { - osmid nid = util::atoul(cur.attr("ref")); + osmid nid = util::atoul(cur.attrs.find("ref")->second); w.nodes.push_back(nid); } else if (strcmp(cur.name, "tag") == 0) { - if (keepAttrs.count(cur.attr("k"))) - w.attrs[cur.attr("k")] = cur.attr("v"); + if (keepAttrs.count(cur.attrs.find("k")->second)) + w.attrs[cur.attrs.find("k")->second] = cur.attrs.find("v")->second; } } } while (xml->next()); @@ -714,7 +626,6 @@ void OsmBuilder::readEdges(pfxml::file* xml, Graph* g, const RelLst& rels, } else { n = (*nodes)[nid]; } - if (last) { auto e = g->addEdg(last, n, EdgePL()); if (!e) continue; @@ -781,14 +692,14 @@ OsmNode OsmBuilder::nextNode(pfxml::file* xml, NIdMap* nodes, if (strcmp(cur.name, "node")) return OsmNode(); n.attrs.clear(); - n.lat = util::atof(cur.attr("lat"), 7); - n.lng = util::atof(cur.attr("lon"), 7); - n.id = util::atoul(cur.attr("id")); + n.lat = util::atof(cur.attrs.find("lat")->second, 7); + n.lng = util::atof(cur.attrs.find("lon")->second, 7); + n.id = util::atoul(cur.attrs.find("id")->second); } if (xml->level() == 3 && n.id && strcmp(cur.name, "tag") == 0) { - if (keepAttrs.count(cur.attr("k"))) - n.attrs[cur.attr("k")] = cur.attr("v"); + if (keepAttrs.count(cur.attrs.find("k")->second)) + n.attrs[cur.attrs.find("k")->second] = cur.attrs.find("v")->second; } } while (xml->next()); @@ -844,42 +755,44 @@ void OsmBuilder::readNodes(pfxml::file* xml, Graph* g, const RelLst& rels, NIdMultMap* multNodes, NodeSet* orphanStations, const AttrKeySet& keepAttrs, const FlatRels& fl, const OsmReadOpts& opts) const { + StAttrGroups attrGroups; + OsmNode nd; while ((nd = nextNode(xml, nodes, multNodes, nodeRels, filter, bBoxNodes, keepAttrs, fl)) .id) { Node* n = 0; - POINT pos = {nd.lng, nd.lat}; + auto pos = util::geo::latLngToWebMerc(nd.lat, nd.lng); if (nodes->count(nd.id)) { n = (*nodes)[nd.id]; n->pl().setGeom(pos); if (filter.station(nd.attrs)) { - auto si = getStatInfo(nd.id, nd.attrs, nodeRels, rels, opts); + auto si = getStatInfo(n, nd.id, pos, nd.attrs, &attrGroups, nodeRels, + rels, opts); if (!si.isNull()) n->pl().setSI(si); } else if (filter.blocker(nd.attrs)) { n->pl().setBlocker(); - } else if (filter.turnCycle(nd.attrs)) { - n->pl().setTurnCycle(); } } else if ((*multNodes).count(nd.id)) { for (auto* n : (*multNodes)[nd.id]) { n->pl().setGeom(pos); if (filter.station(nd.attrs)) { - auto si = getStatInfo(nd.id, nd.attrs, nodeRels, rels, opts); + auto si = getStatInfo(n, nd.id, pos, nd.attrs, &attrGroups, nodeRels, + rels, opts); if (!si.isNull()) n->pl().setSI(si); } else if (filter.blocker(nd.attrs)) { n->pl().setBlocker(); - } else if (filter.turnCycle(nd.attrs)) { - n->pl().setTurnCycle(); } } } else { // these are nodes without any connected edges if (filter.station(nd.attrs)) { auto tmp = g->addNd(NodePL(pos)); - auto si = getStatInfo(nd.id, nd.attrs, nodeRels, rels, opts); + auto si = getStatInfo(tmp, nd.id, pos, nd.attrs, &attrGroups, nodeRels, + rels, opts); if (!si.isNull()) tmp->pl().setSI(si); if (tmp->pl().getSI()) { + tmp->pl().getSI()->setIsFromOsm(false); orphanStations->insert(tmp); } } @@ -915,34 +828,34 @@ OsmRel OsmBuilder::nextRel(pfxml::file* xml, const OsmFilter& filter, rel.wayRoles.clear(); rel.keepFlags = 0; rel.dropFlags = 0; - rel.id = util::atoul(cur.attr("id")); + rel.id = util::atoul(cur.attrs.find("id")->second); } if (xml->level() == 3 && rel.id) { if (strcmp(cur.name, "member") == 0) { - if (strcmp(cur.attr("type"), "node") == 0) { - osmid id = util::atoul(cur.attr("ref")); + if (strcmp(cur.attrs.find("type")->second, "node") == 0) { + osmid id = util::atoul(cur.attrs.find("ref")->second); // TODO(patrick): no need to push IDs that have been filtered out by // the bounding box!!!! rel.nodes.push_back(id); - if (cur.attr("role")) { - rel.nodeRoles.push_back(cur.attr("role")); + if (cur.attrs.count("role")) { + rel.nodeRoles.push_back(cur.attrs.find("role")->second); } else { rel.nodeRoles.push_back(""); } } - if (strcmp(cur.attr("type"), "way") == 0) { - osmid id = util::atoul(cur.attr("ref")); + if (strcmp(cur.attrs.find("type")->second, "way") == 0) { + osmid id = util::atoul(cur.attrs.find("ref")->second); rel.ways.push_back(id); - if (cur.attr("role")) { - rel.wayRoles.push_back(cur.attr("role")); + if (cur.attrs.count("role")) { + rel.wayRoles.push_back(cur.attrs.find("role")->second); } else { rel.wayRoles.push_back(""); } } } else if (strcmp(cur.name, "tag") == 0) { - if (keepAttrs.count(cur.attr("k"))) - rel.attrs[cur.attr("k")] = cur.attr("v"); + if (keepAttrs.count(cur.attrs.find("k")->second)) + rel.attrs[cur.attrs.find("k")->second] = cur.attrs.find("v")->second; } } } while (xml->next()); @@ -1023,13 +936,14 @@ void OsmBuilder::readRestr(const OsmRel& rel, Restrictions* rests, // _____________________________________________________________________________ std::string OsmBuilder::getAttrByFirstMatch(const DeepAttrLst& rule, osmid id, - const AttrMap& am, + const AttrMap& attrs, const RelMap& entRels, const RelLst& rels, const Normalizer& normzer) const { std::string ret; for (const auto& s : rule) { - ret = normzer.norm(pfxml::file::decode(getAttr(s, id, am, entRels, rels))); + ret = + normzer.norm(pfxml::file::decode(getAttr(s, id, attrs, entRels, rels))); if (!ret.empty()) return ret; } @@ -1038,12 +952,13 @@ std::string OsmBuilder::getAttrByFirstMatch(const DeepAttrLst& rule, osmid id, // _____________________________________________________________________________ std::vector OsmBuilder::getAttrMatchRanked( - const DeepAttrLst& rule, osmid id, const AttrMap& am, const RelMap& entRels, - const RelLst& rels, const Normalizer& norm) const { + const DeepAttrLst& rule, osmid id, const AttrMap& attrs, + const RelMap& entRels, const RelLst& rels, + const Normalizer& normzer) const { std::vector ret; for (const auto& s : rule) { std::string tmp = - norm.norm(pfxml::file::decode(getAttr(s, id, am, entRels, rels))); + normzer.norm(pfxml::file::decode(getAttr(s, id, attrs, entRels, rels))); if (!tmp.empty()) ret.push_back(tmp); } @@ -1052,11 +967,11 @@ std::vector OsmBuilder::getAttrMatchRanked( // _____________________________________________________________________________ std::string OsmBuilder::getAttr(const DeepAttrRule& s, osmid id, - const AttrMap& am, const RelMap& entRels, + const AttrMap& attrs, const RelMap& entRels, const RelLst& rels) const { if (s.relRule.kv.first.empty()) { - if (am.find(s.attr) != am.end()) { - return am.find(s.attr)->second; + if (attrs.find(s.attr) != attrs.end()) { + return attrs.find(s.attr)->second; } } else { if (entRels.count(id)) { @@ -1073,7 +988,9 @@ std::string OsmBuilder::getAttr(const DeepAttrRule& s, osmid id, } // _____________________________________________________________________________ -Nullable OsmBuilder::getStatInfo(osmid nid, const AttrMap& m, +Nullable OsmBuilder::getStatInfo(Node* node, osmid nid, + const POINT& pos, const AttrMap& m, + StAttrGroups* groups, const RelMap& nodeRels, const RelLst& rels, const OsmReadOpts& ops) const { @@ -1087,7 +1004,7 @@ Nullable OsmBuilder::getStatInfo(osmid nid, const AttrMap& m, if (!names.size()) return Nullable(); - auto ret = StatInfo(names[0], platform); + auto ret = StatInfo(names[0], platform, true); #ifdef PFAEDLE_STATION_IDS ret.setId(getAttrByFirstMatch(ops.statAttrRules.idRule, nid, m, nodeRels, @@ -1096,25 +1013,62 @@ Nullable OsmBuilder::getStatInfo(osmid nid, const AttrMap& m, for (size_t i = 1; i < names.size(); i++) ret.addAltName(names[i]); + bool groupFound = false; + + for (const auto& rule : ops.statGroupNAttrRules) { + if (groupFound) break; + std::string ruleVal = getAttr(rule.attr, nid, m, nodeRels, rels); + if (!ruleVal.empty()) { + // check if a matching group exists + for (auto* group : (*groups)[rule.attr.attr][ruleVal]) { + if (groupFound) break; + for (const auto* member : group->getNodes()) { + if (webMercMeterDist(*member->pl().getGeom(), pos) <= rule.maxDist) { + // ok, group is matching + groupFound = true; + if (node) group->addNode(node); + ret.setGroup(group); + break; + } + } + } + } + } + + if (!groupFound) { + for (const auto& rule : ops.statGroupNAttrRules) { + std::string ruleVal = getAttr(rule.attr, nid, m, nodeRels, rels); + if (!ruleVal.empty()) { + // add new group + StatGroup* g = new StatGroup(); + if (node) g->addNode(node); + ret.setGroup(g); + (*groups)[rule.attr.attr][ruleVal].push_back(g); + break; + } + } + } + return ret; } // _____________________________________________________________________________ double OsmBuilder::dist(const Node* a, const Node* b) { - return util::geo::haversine(*(a->pl().getGeom()), *(b->pl().getGeom())); + return webMercMeterDist(*a->pl().getGeom(), *b->pl().getGeom()); } // _____________________________________________________________________________ -void OsmBuilder::writeGeoms(Graph* g, const OsmReadOpts& opts) { - for (auto* n : g->getNds()) { - for (auto* e : n->getAdjListOut()) { - if (!e->pl().getGeom()) { - e->pl().addPoint(*e->getFrom()->pl().getGeom()); - e->pl().addPoint(*e->getTo()->pl().getGeom()); - } +double OsmBuilder::webMercDist(const Node* a, const Node* b) { + return webMercMeterDist(*a->pl().getGeom(), *b->pl().getGeom()); +} - e->pl().setCost(costToInt(e->pl().getLength() / - opts.levelDefSpeed[e->pl().lvl()])); +// _____________________________________________________________________________ +void OsmBuilder::writeGeoms(Graph* g) { + for (auto* n : *g->getNds()) { + for (auto* e : n->getAdjListOut()) { + e->pl().addPoint(*e->getFrom()->pl().getGeom()); + e->pl().setLength(dist(e->getFrom(), e->getTo())); + e->pl().addPoint(*e->getTo()->pl().getGeom()); } } } @@ -1122,18 +1076,18 @@ void OsmBuilder::writeGeoms(Graph* g, const OsmReadOpts& opts) { // _____________________________________________________________________________ void OsmBuilder::fixGaps(Graph* g, NodeGrid* ng) { double METER = 1; - for (auto* n : g->getNds()) { + for (auto* n : *g->getNds()) { if (n->getInDeg() + n->getOutDeg() == 1) { // get all nodes in distance std::set ret; - double distor = util::geo::latLngDistFactor(*n->pl().getGeom()); + double distor = util::geo::webMercDistFactor(*n->pl().getGeom()); ng->get(util::geo::pad(util::geo::getBoundingBox(*n->pl().getGeom()), - (METER / M_PER_DEG) / distor), + METER / distor), &ret); for (auto* nb : ret) { if (nb != n && (nb->getInDeg() + nb->getOutDeg()) == 1 && - dist(nb, n) <= METER) { - // special case: both nodes are non-stations, move + webMercDist(nb, n) <= METER / distor) { + // special case: both node are non-stations, move // the end point nb to n and delete nb if (!nb->pl().getSI() && !n->pl().getSI()) { Node* otherN; @@ -1141,6 +1095,9 @@ void OsmBuilder::fixGaps(Graph* g, NodeGrid* ng) { otherN = (*nb->getAdjListOut().begin())->getOtherNd(nb); else otherN = (*nb->getAdjListIn().begin())->getOtherNd(nb); + LINE l; + l.push_back(*otherN->pl().getGeom()); + l.push_back(*n->pl().getGeom()); Edge* e; if (nb->getOutDeg()) @@ -1148,6 +1105,7 @@ void OsmBuilder::fixGaps(Graph* g, NodeGrid* ng) { else e = g->addEdg(otherN, n, (*nb->getAdjListIn().begin())->pl()); if (e) { + *e->pl().getGeom() = l; g->delNd(nb); ng->remove(nb); } @@ -1165,27 +1123,25 @@ void OsmBuilder::fixGaps(Graph* g, NodeGrid* ng) { } // _____________________________________________________________________________ -EdgeGrid OsmBuilder::buildEdgeIdx(Graph* g, double size, const BOX& box) { - EdgeGrid ret(size, size, box, false); - for (auto* n : g->getNds()) { +EdgeGrid OsmBuilder::buildEdgeIdx(Graph* g, size_t size, + const BOX& webMercBox) { + EdgeGrid ret(size, size, webMercBox, false); + for (auto* n : *g->getNds()) { for (auto* e : n->getAdjListOut()) { - auto llGeom = - LINE{*e->getFrom()->pl().getGeom(), *e->getTo()->pl().getGeom()}; - ret.add(llGeom, e); + assert(e->pl().getGeom()); + ret.add(*e->pl().getGeom(), e); } } return ret; } // _____________________________________________________________________________ -NodeGrid OsmBuilder::buildNodeIdx(Graph* g, double size, const BOX& box, +NodeGrid OsmBuilder::buildNodeIdx(Graph* g, size_t size, const BOX& webMercBox, bool which) { - NodeGrid ret(size, size, box, false); - for (auto* n : g->getNds()) { - // only orphan nodes + NodeGrid ret(size, size, webMercBox, false); + for (auto* n : *g->getNds()) { if (!which && n->getInDeg() + n->getOutDeg() == 1) ret.add(*n->pl().getGeom(), n); - // only station nodes else if (which && n->pl().getSI()) ret.add(*n->pl().getGeom(), n); } @@ -1197,8 +1153,8 @@ Node* OsmBuilder::depthSearch(const Edge* e, const StatInfo* si, const POINT& p, double maxD, int maxFullTurns, double minAngle, const SearchFunc& sfunc) { // shortcuts - double dFrom = haversine(*e->getFrom()->pl().getGeom(), p); - double dTo = haversine(*e->getTo()->pl().getGeom(), p); + double dFrom = webMercMeterDist(*e->getFrom()->pl().getGeom(), p); + double dTo = webMercMeterDist(*e->getTo()->pl().getGeom(), p); if (dFrom > maxD && dTo > maxD) return 0; if (dFrom <= maxD && sfunc(e->getFrom(), si)) return e->getFrom(); @@ -1241,15 +1197,13 @@ Node* OsmBuilder::depthSearch(const Edge* e, const StatInfo* si, const POINT& p, if (util::geo::innerProd(nodeP, fromP, toP) < minAngle) fullTurn = 1; } - double eLen = dist(edg->getFrom(), edg->getTo()); - if ((maxFullTurns < 0 || cur.fullTurns + fullTurn <= maxFullTurns) && - cur.dist + eLen < maxD && !closed.count(cand)) { + cur.dist + edg->pl().getLength() < maxD && !closed.count(cand)) { if (sfunc(cand, si)) { return cand; } else { - pq.push( - NodeCand{cur.dist + eLen, cand, edg, cur.fullTurns + fullTurn}); + pq.push(NodeCand{cur.dist + edg->pl().getLength(), cand, edg, + cur.fullTurns + fullTurn}); } } } @@ -1266,39 +1220,96 @@ bool OsmBuilder::isBlocked(const Edge* e, const StatInfo* si, const POINT& p, // _____________________________________________________________________________ Node* OsmBuilder::eqStatReach(const Edge* e, const StatInfo* si, const POINT& p, - double maxD, int maxFullTurns, double minAngle) { - return depthSearch(e, si, p, maxD, maxFullTurns, minAngle, EqSearch()); + double maxD, int maxFullTurns, double minAngle, + bool orphanSnap) { + return depthSearch(e, si, p, maxD, maxFullTurns, minAngle, + EqSearch(orphanSnap)); } // _____________________________________________________________________________ void OsmBuilder::getEdgCands(const POINT& geom, EdgeCandPQ* ret, EdgeGrid* eg, double d) { - double distor = util::geo::latLngDistFactor(geom); + double distor = util::geo::webMercDistFactor(geom); std::set neighs; - BOX box = - util::geo::pad(util::geo::getBoundingBox(geom), (d / M_PER_DEG) / distor); + BOX box = util::geo::pad(util::geo::getBoundingBox(geom), d / distor); eg->get(box, &neighs); for (auto* e : neighs) { double dist = util::geo::distToSegment(*e->getFrom()->pl().getGeom(), *e->getTo()->pl().getGeom(), geom); - if (dist * distor * M_PER_DEG <= d) { + if (dist * distor <= d) { ret->push(EdgeCand(-dist, e)); } } } // _____________________________________________________________________________ -void OsmBuilder::snapStation(Graph* g, NodePL* s, EdgeGrid* eg, NodeGrid* sng, - const OsmReadOpts& opts, Restrictor* restor, - double d) { +std::set OsmBuilder::getMatchingNds(const NodePL& s, NodeGrid* ng, + double d) { + std::set ret; + double distor = util::geo::webMercDistFactor(*s.getGeom()); + std::set neighs; + BOX box = util::geo::pad(util::geo::getBoundingBox(*s.getGeom()), d / distor); + ng->get(box, &neighs); + + for (auto* n : neighs) { + if (n->pl().getSI() && n->pl().getSI()->simi(s.getSI()) > 0.5) { + double dist = webMercMeterDist(*n->pl().getGeom(), *s.getGeom()); + if (dist < d) ret.insert(n); + } + } + + return ret; +} + +// _____________________________________________________________________________ +Node* OsmBuilder::getMatchingNd(const NodePL& s, NodeGrid* ng, double d) { + double distor = util::geo::webMercDistFactor(*s.getGeom()); + std::set neighs; + BOX box = util::geo::pad(util::geo::getBoundingBox(*s.getGeom()), d / distor); + ng->get(box, &neighs); + + Node* ret = 0; + double bestD = std::numeric_limits::max(); + + for (auto* n : neighs) { + if (n->pl().getSI() && n->pl().getSI()->simi(s.getSI()) > 0.5) { + double dist = webMercMeterDist(*n->pl().getGeom(), *s.getGeom()); + if (dist < d && dist < bestD) { + bestD = dist; + ret = n; + } + } + } + + return ret; +} + +// _____________________________________________________________________________ +std::set OsmBuilder::snapStation(Graph* g, NodePL* s, EdgeGrid* eg, + NodeGrid* sng, const OsmReadOpts& opts, + Restrictor* restor, bool surrHeur, + bool orphSnap, double d) { assert(s->getSI()); + std::set ret; EdgeCandPQ pq; getEdgCands(*s->getGeom(), &pq, eg, d); + if (pq.empty() && surrHeur) { + // no station found in the first round, try again with the nearest + // surrounding station with matching name + const Node* best = getMatchingNd(*s, sng, opts.maxSnapFallbackHeurDistance); + if (best) { + getEdgCands(*best->pl().getGeom(), &pq, eg, d); + } else { + // if still no luck, get edge cands in fallback snap distance + getEdgCands(*s->getGeom(), &pq, eg, opts.maxSnapFallbackHeurDistance); + } + } + while (!pq.empty()) { auto* e = pq.top().second; pq.pop(); @@ -1308,51 +1319,96 @@ void OsmBuilder::snapStation(Graph* g, NodePL* s, EdgeGrid* eg, NodeGrid* sng, Node* eq = 0; if (!(eq = eqStatReach(e, s->getSI(), geom, 2 * d, 0, - opts.maxAngleSnapReach))) { + opts.maxAngleSnapReach, orphSnap))) { if (e->pl().lvl() > opts.maxSnapLevel) continue; if (isBlocked(e, s->getSI(), geom, opts.maxBlockDistance, 0, opts.maxAngleSnapReach)) { continue; } - // if the projected position is near (< 0.5 meters) the end point of this + // if the projected position is near (< 2 meters) the end point of this // way and the endpoint is not already a station, place the station there. if (!e->getFrom()->pl().getSI() && - haversine(geom, *e->getFrom()->pl().getGeom()) < .5) { + webMercMeterDist(geom, *e->getFrom()->pl().getGeom()) < 2) { e->getFrom()->pl().setSI(*s->getSI()); + if (s->getSI()->getGroup()) + s->getSI()->getGroup()->addNode(e->getFrom()); + ret.insert(e->getFrom()); } else if (!e->getTo()->pl().getSI() && - haversine(geom, *e->getTo()->pl().getGeom()) < .5) { + webMercMeterDist(geom, *e->getTo()->pl().getGeom()) < 2) { e->getTo()->pl().setSI(*s->getSI()); + if (s->getSI()->getGroup()) s->getSI()->getGroup()->addNode(e->getTo()); + ret.insert(e->getTo()); } else { s->setGeom(geom); Node* n = g->addNd(*s); + if (n->pl().getSI()->getGroup()) + n->pl().getSI()->getGroup()->addNode(n); sng->add(geom, n); auto ne = g->addEdg(e->getFrom(), n, e->pl()); - ne->pl().setCost(costToInt(dist(e->getFrom(), n) / - opts.levelDefSpeed[ne->pl().lvl()])); - eg->add({*e->getFrom()->pl().getGeom(), *n->pl().getGeom()}, ne); + ne->pl().setLength(webMercDist(n, e->getFrom())); + LINE l; + l.push_back(*e->getFrom()->pl().getGeom()); + l.push_back(*n->pl().getGeom()); + *ne->pl().getGeom() = l; + eg->add(l, ne); auto nf = g->addEdg(n, e->getTo(), e->pl()); - nf->pl().setCost(costToInt(dist(n, e->getTo()) / - opts.levelDefSpeed[nf->pl().lvl()])); - eg->add({*n->pl().getGeom(), *e->getTo()->pl().getGeom()}, nf); + nf->pl().setLength(webMercDist(n, e->getTo())); + LINE ll; + ll.push_back(*n->pl().getGeom()); + ll.push_back(*e->getTo()->pl().getGeom()); + *nf->pl().getGeom() = ll; + eg->add(ll, nf); // replace edge in restrictor restor->replaceEdge(e, ne, nf); g->delEdg(e->getFrom(), e->getTo()); eg->remove(e); + ret.insert(n); } } else { // if the snapped station is very near to the original OSM station // write additional info from this snap station to the equivalent stat - if (haversine(*s->getGeom(), *eq->pl().getGeom()) < 5) { + if (webMercMeterDist(*s->getGeom(), *eq->pl().getGeom()) < + opts.maxOsmStationDistance) { if (eq->pl().getSI()->getTrack().empty()) eq->pl().getSI()->setTrack(s->getSI()->getTrack()); } + ret.insert(eq); } } + + return ret; +} + +// _____________________________________________________________________________ +StatGroup* OsmBuilder::groupStats(const NodeSet& s) { + if (!s.size()) return 0; + // reference group + StatGroup* ret = new StatGroup(); + bool used = false; + + for (auto* n : s) { + if (!n->pl().getSI()) continue; + used = true; + if (n->pl().getSI()->getGroup()) { + // this node is already in a group - merge this group with this one + ret->merge(n->pl().getSI()->getGroup()); + } else { + ret->addNode(n); + n->pl().getSI()->setGroup(ret); + } + } + + if (!used) { + delete ret; + return 0; + } + + return ret; } // _____________________________________________________________________________ @@ -1367,7 +1423,6 @@ std::vector OsmBuilder::getLines( elp = _relLines[relId]; } else { TransitEdgeLine el; - el.color = ad::cppgtfs::gtfs::NO_COLOR; bool found = false; for (const auto& r : ops.relLinerules.sNameRule) { @@ -1405,23 +1460,6 @@ std::vector OsmBuilder::getLines( if (found) break; } - found = false; - for (const auto& r : ops.relLinerules.colorRule) { - for (const auto& relAttr : rels.rels[relId]) { - if (relAttr.first == r) { - auto dec = pfxml::file::decode(relAttr.second); - auto color = parseHexColor(dec); - if (color == ad::cppgtfs::gtfs::NO_COLOR) - color = parseHexColor(std::string("#") + dec); - if (color != ad::cppgtfs::gtfs::NO_COLOR) { - found = true; - el.color = color; - } - } - } - if (found) break; - } - if (!el.shortName.size() && !el.fromStr.size() && !el.toStr.size()) continue; @@ -1443,6 +1481,15 @@ std::vector OsmBuilder::getLines( // _____________________________________________________________________________ void OsmBuilder::getKeptAttrKeys(const OsmReadOpts& opts, AttrKeySet sets[3]) const { + for (const auto& i : opts.statGroupNAttrRules) { + if (i.attr.relRule.kv.first.empty()) { + sets[0].insert(i.attr.attr); + } else { + sets[2].insert(i.attr.relRule.kv.first); + sets[2].insert(i.attr.attr); + } + } + for (const auto& i : opts.keepFilter) { for (size_t j = 0; j < 3; j++) sets[j].insert(i.first); } @@ -1476,10 +1523,6 @@ void OsmBuilder::getKeptAttrKeys(const OsmReadOpts& opts, sets[0].insert(i.first); } - for (const auto& i : opts.turnCycleFilter) { - sets[0].insert(i.first); - } - for (uint8_t j = 0; j < 7; j++) { for (const auto& kv : *(opts.levelFilters + j)) { sets[1].insert(kv.first); @@ -1507,8 +1550,6 @@ void OsmBuilder::getKeptAttrKeys(const OsmReadOpts& opts, opts.relLinerules.fromNameRule.end()); sets[2].insert(opts.relLinerules.sNameRule.begin(), opts.relLinerules.sNameRule.end()); - sets[2].insert(opts.relLinerules.colorRule.begin(), - opts.relLinerules.colorRule.end()); for (const auto& i : opts.statAttrRules.nameRule) { if (i.relRule.kv.first.empty()) { @@ -1548,15 +1589,40 @@ void OsmBuilder::getKeptAttrKeys(const OsmReadOpts& opts, } // _____________________________________________________________________________ -void OsmBuilder::deleteOrphNds(Graph* g, const OsmReadOpts& opts) { - UNUSED(opts); - for (auto i = g->getNds().begin(); i != g->getNds().end();) { - if ((*i)->getInDeg() + (*i)->getOutDeg() != 0 || (*i)->pl().getSI()) { - ++i; - continue; - } +void OsmBuilder::deleteOrphEdgs(Graph* g, const OsmReadOpts& opts) { + size_t ROUNDS = 3; + for (size_t c = 0; c < ROUNDS; c++) { + for (auto i = g->getNds()->begin(); i != g->getNds()->end();) { + if ((*i)->getInDeg() + (*i)->getOutDeg() != 1 || (*i)->pl().getSI()) { + ++i; + continue; + } - i = g->delNd(*i); + // check if the removal of this edge would transform a steep angle + // full turn at an intersection into a node 2 eligible for contraction + // if so, dont delete + if (keepFullTurn(*i, opts.fullTurnAngle)) { + ++i; + continue; + } + + i = g->delNd(*i); + continue; + i++; + } + } +} + +// _____________________________________________________________________________ +void OsmBuilder::deleteOrphNds(Graph* g) { + for (auto i = g->getNds()->begin(); i != g->getNds()->end();) { + if ((*i)->getInDeg() + (*i)->getOutDeg() == 0 && + !((*i)->pl().getSI() && (*i)->pl().getSI()->getGroup())) { + i = g->delNd(i); + // TODO(patrick): maybe delete from node grid? + } else { + i++; + } } } @@ -1566,11 +1632,11 @@ bool OsmBuilder::edgesSim(const Edge* a, const Edge* b) { return false; if (a->pl().lvl() != b->pl().lvl()) return false; if (a->pl().getLines().size() != b->pl().getLines().size()) return false; + if (a->pl().getLines() != b->pl().getLines()) return false; if (a->pl().oneWay() && b->pl().oneWay()) { if (a->getFrom() != b->getTo() && a->getTo() != b->getFrom()) return false; } if (a->pl().isRestricted() || b->pl().isRestricted()) return false; - if (a->pl().getLines() != b->pl().getLines()) return false; return true; } @@ -1585,60 +1651,39 @@ const EdgePL& OsmBuilder::mergeEdgePL(Edge* a, Edge* b) { else n = a->getTo(); - if (a->pl().getGeom() == 0) { - a->pl().addPoint(*a->getFrom()->pl().getGeom()); - a->pl().addPoint(*a->getTo()->pl().getGeom()); - } - if (a->getTo() == n && b->getTo() == n) { // --> n <-- - if (b->pl().getGeom()) { - a->pl().getGeom()->insert(a->pl().getGeom()->end(), - b->pl().getGeom()->rbegin(), - b->pl().getGeom()->rend()); - } else { - a->pl().getGeom()->push_back(*b->getFrom()->pl().getGeom()); - } + a->pl().getGeom()->insert(a->pl().getGeom()->end(), + b->pl().getGeom()->rbegin(), + b->pl().getGeom()->rend()); } else if (a->getTo() == n && b->getFrom() == n) { // --> n --> - if (b->pl().getGeom()) { - a->pl().getGeom()->insert(a->pl().getGeom()->end(), - b->pl().getGeom()->begin(), - b->pl().getGeom()->end()); - } else { - a->pl().getGeom()->push_back(*b->getTo()->pl().getGeom()); - } + a->pl().getGeom()->insert(a->pl().getGeom()->end(), + b->pl().getGeom()->begin(), + b->pl().getGeom()->end()); } else if (a->getFrom() == n && b->getTo() == n) { // <-- n <-- std::reverse(a->pl().getGeom()->begin(), a->pl().getGeom()->end()); - if (b->pl().getGeom()) { - a->pl().getGeom()->insert(a->pl().getGeom()->end(), - b->pl().getGeom()->rbegin(), - b->pl().getGeom()->rend()); - } else { - a->pl().getGeom()->push_back(*b->getFrom()->pl().getGeom()); - } + a->pl().getGeom()->insert(a->pl().getGeom()->end(), + b->pl().getGeom()->rbegin(), + b->pl().getGeom()->rend()); } else { // <-- n --> std::reverse(a->pl().getGeom()->begin(), a->pl().getGeom()->end()); - if (b->pl().getGeom()) { - a->pl().getGeom()->insert(a->pl().getGeom()->end(), - b->pl().getGeom()->begin(), - b->pl().getGeom()->end()); - } else { - a->pl().getGeom()->push_back(*b->getTo()->pl().getGeom()); - } + a->pl().getGeom()->insert(a->pl().getGeom()->end(), + b->pl().getGeom()->begin(), + b->pl().getGeom()->end()); } + a->pl().setLength(a->pl().getLength() + b->pl().getLength()); + return a->pl(); } // _____________________________________________________________________________ void OsmBuilder::collapseEdges(Graph* g) { - for (auto n : g->getNds()) { - if (n->getOutDeg() + n->getInDeg() != 2 || n->pl().getSI() || - n->pl().isTurnCycle()) - continue; + for (auto* n : *g->getNds()) { + if (n->getOutDeg() + n->getInDeg() != 2 || n->pl().getSI()) continue; Edge* ea; Edge* eb; @@ -1654,7 +1699,7 @@ void OsmBuilder::collapseEdges(Graph* g) { } // important, we don't have a multigraph! if the same edge - // already exists, leave this node + // will already exist, leave this node if (g->getEdg(ea->getOtherNd(n), eb->getOtherNd(n))) continue; if (g->getEdg(eb->getOtherNd(n), ea->getOtherNd(n))) continue; @@ -1664,7 +1709,6 @@ void OsmBuilder::collapseEdges(Graph* g) { } else { g->addEdg(ea->getOtherNd(n), eb->getOtherNd(n), mergeEdgePL(ea, eb)); } - g->delEdg(ea->getFrom(), ea->getTo()); g->delEdg(eb->getFrom(), eb->getTo()); } @@ -1673,55 +1717,49 @@ void OsmBuilder::collapseEdges(Graph* g) { // _____________________________________________________________________________ void OsmBuilder::simplifyGeoms(Graph* g) { - for (auto* n : g->getNds()) { + for (auto* n : *g->getNds()) { for (auto* e : n->getAdjListOut()) { - (*e->pl().getGeom()) = - util::geo::simplify(*e->pl().getGeom(), 0.5 / M_PER_DEG); + (*e->pl().getGeom()) = util::geo::simplify(*e->pl().getGeom(), 0.5); } } } // _____________________________________________________________________________ -uint32_t OsmBuilder::writeComps(Graph* g, const OsmReadOpts& opts) { - NodePL::comps.clear(); - NodePL::comps.emplace_back(Component{0}); +uint32_t OsmBuilder::writeComps(Graph* g) { + Component* comp = new Component{7}; uint32_t numC = 0; - uint64_t numNds = 0; - double fac = opts.maxSpeedCorFac; - - for (auto* n : g->getNds()) { - if (!n->pl().getCompId()) { + for (auto* n : *g->getNds()) { + if (!n->pl().getComp()) { std::stack> q; q.push(std::pair(n, 0)); while (!q.empty()) { std::pair cur = q.top(); q.pop(); - cur.first->pl().setComp(NodePL::comps.size()); - numNds++; + cur.first->pl().setComp(comp); for (auto* e : cur.first->getAdjListOut()) { - double speed = opts.levelDefSpeed[e->pl().lvl()] / fac; - if (speed > NodePL::comps.back().maxSpeed) - NodePL::comps.back().maxSpeed = speed; - if (!e->getOtherNd(cur.first)->pl().getCompId()) + if (e->pl().lvl() < comp->minEdgeLvl) + comp->minEdgeLvl = e->pl().lvl(); + if (!e->getOtherNd(cur.first)->pl().getComp()) q.push(std::pair(e->getOtherNd(cur.first), e)); } for (auto* e : cur.first->getAdjListIn()) { - double speed = opts.levelDefSpeed[e->pl().lvl()] / fac; - if (speed > NodePL::comps.back().maxSpeed) - NodePL::comps.back().maxSpeed = speed; - if (!e->getOtherNd(cur.first)->pl().getCompId()) + if (e->pl().lvl() < comp->minEdgeLvl) + comp->minEdgeLvl = e->pl().lvl(); + if (!e->getOtherNd(cur.first)->pl().getComp()) q.push(std::pair(e->getOtherNd(cur.first), e)); } } - if (numNds > 1) numC++; - NodePL::comps.emplace_back(Component{0}); - numNds = 0; + numC++; + comp = new Component{7}; } } + // the last comp was not used + delete comp; + return numC; } @@ -1741,11 +1779,10 @@ void OsmBuilder::writeEdgeTracks(const EdgTracks& tracks) { // _____________________________________________________________________________ void OsmBuilder::writeODirEdgs(Graph* g, Restrictor* restor) { - for (auto* n : g->getNds()) { + for (auto* n : *g->getNds()) { for (auto* e : n->getAdjListOut()) { if (g->getEdg(e->getTo(), e->getFrom())) continue; auto newE = g->addEdg(e->getTo(), e->getFrom(), e->pl().revCopy()); - assert(newE->pl().getGeom()); if (e->pl().isRestricted()) restor->duplicateEdge(e, newE); } } @@ -1753,45 +1790,9 @@ void OsmBuilder::writeODirEdgs(Graph* g, Restrictor* restor) { // _____________________________________________________________________________ void OsmBuilder::writeSelfEdgs(Graph* g) { - // if a station only has degree 1, there is no way to arrive at this station - // without doing a full turn (because the outgoing candidate edge is always - // the incoming edge). This is a problem at end-stations. We solve this by - // adding self-edges with infinite costs - this still allows usage as - // arrivals, does not punish bends (because the node degree is still only 2) - // and prevents the usage of the edge to circumvent turn penalties - for (auto* n : g->getNds()) { - if (n->pl().getSI() && n->getAdjListOut().size() == 1) { - auto e = g->addEdg(n, n); - e->pl().setCost(std::numeric_limits::max()); - e->pl().addPoint(*e->getFrom()->pl().getGeom()); - e->pl().addPoint(*e->getTo()->pl().getGeom()); - } - } -} - -// _____________________________________________________________________________ -void OsmBuilder::writeNoLinePens(Graph* g, const OsmReadOpts& opts) { - for (auto* n : g->getNds()) { - for (auto* e : n->getAdjListOut()) { - if (e->pl().getLines().size() == 0) { - double c = e->pl().getCost(); - c = c / 10.0; // convert into seconds - e->pl().setCost(costToInt(c * opts.noLinesPunishFact)); - } - } - } -} - -// _____________________________________________________________________________ -void OsmBuilder::writeOneWayPens(Graph* g, const OsmReadOpts& opts) { - for (auto* n : g->getNds()) { - for (auto* e : n->getAdjListOut()) { - if (e->pl().oneWay() == 2) { - double c = e->pl().getCost(); - c = c / 10.0; // convert into seconds - e->pl().setCost( - costToInt(c * opts.oneWaySpeedPen + opts.oneWayEntryCost)); - } + for (auto* n : *g->getNds()) { + if (n->pl().getSI() && n->getAdjListOut().size() == 0) { + g->addEdg(n, n); } } } @@ -1825,25 +1826,12 @@ bool OsmBuilder::keepFullTurn(const trgraph::Node* n, double ang) { b = f; } - POINT ap, bp; + auto ap = a->pl().backHop(); + auto bp = b->pl().backHop(); + if (a->getTo() != other) ap = a->pl().frontHop(); + if (b->getTo() != other) bp = b->pl().frontHop(); - if (!a || !b) return false; - - if (a->pl().getGeom() && b->pl().getGeom()) { - ap = a->pl().backHop(); - bp = b->pl().backHop(); - if (a->getTo() != other) ap = a->pl().frontHop(); - if (b->getTo() != other) bp = b->pl().frontHop(); - } else { - assert(!a->pl().getGeom()); - assert(!b->pl().getGeom()); - ap = *a->getTo()->pl().getGeom(); - bp = *b->getTo()->pl().getGeom(); - if (a->getTo() != other) ap = *a->getFrom()->pl().getGeom(); - if (b->getTo() != other) bp = *b->getFrom()->pl().getGeom(); - } - - return util::geo::innerProd(*other->pl().getGeom(), ap, bp) > ang; + return router::angSmaller(ap, *other->pl().getGeom(), bp, ang); } return false; @@ -1851,88 +1839,121 @@ bool OsmBuilder::keepFullTurn(const trgraph::Node* n, double ang) { // _____________________________________________________________________________ void OsmBuilder::snapStats(const OsmReadOpts& opts, Graph* g, - const BBoxIdx& bbox, double gridSize, - Restrictor* res, const NodeSet& orphanStations) { - NodeGrid sng = buildNodeIdx(g, gridSize, bbox.getFullBox(), true); - EdgeGrid eg = buildEdgeIdx(g, gridSize, bbox.getFullBox()); + const BBoxIdx& bbox, size_t gridSize, + router::FeedStops* fs, Restrictor* res, + const NodeSet& orphanStations) { + NodeGrid sng = buildNodeIdx(g, gridSize, bbox.getFullWebMercBox(), true); + EdgeGrid eg = buildEdgeIdx(g, gridSize, bbox.getFullWebMercBox()); LOG(DEBUG) << "Grid size of " << sng.getXWidth() << "x" << sng.getYHeight(); - for (double d : opts.maxOsmStationDistances) { + for (double d : opts.maxSnapDistances) { for (auto s : orphanStations) { + POINT geom = *s->pl().getGeom(); NodePL pl = s->pl(); - snapStation(g, &pl, &eg, &sng, opts, res, d); - } - } -} - -// _____________________________________________________________________________ -uint32_t OsmBuilder::costToInt(double c) { - // always round upwards, otherwise when combined with the heuristic which - // is always rounded downwards the PQ monotonicity is not ensured anymore - - // with a downward rounding, the rounding errors may sum up so high that the - // path will get cheaper than the heuristic cost - uint32_t val = std::ceil(c * 10); - if (std::ceil(c * 10) > std::numeric_limits::max()) { - LOG(DEBUG) << "Cost " << c - << " does not fit in unsigned 32 bit integer, defaulting to " - << std::numeric_limits::max() << "."; - return std::numeric_limits::max(); - } - return val; -} - -// _____________________________________________________________________________ -uint32_t OsmBuilder::parseHexColor(std::string s) const { - // TODO(patrick): not very nice - size_t proced = 0; - std::transform(s.begin(), s.end(), s.begin(), ::toupper); - std::string ret = " "; - if (s.size() == 7 && s[0] == '#') { - for (size_t i = 1; i < 7; i++) { - if (isdigit(s[i])) - ret[i - 1] = s[i]; - else if (isalpha(s[i]) && (s[i] > 64 && s[i] < 71)) - ret[i - 1] = s[i]; - else - return ad::cppgtfs::gtfs::NO_COLOR; - } - - return std::stoul("0x" + ret, &proced, 16); - } - - if (s.size() == 4 && s[0] == '#') { - for (size_t i = 1; i < 4; i++) { - if (isdigit(s[i])) { - ret[(i - 1) * 2] = s[i]; - ret[(i - 1) * 2 + 1] = s[i]; - } else if (isalpha(s[i]) && (s[i] > 64 && s[i] < 71)) { - ret[(i - 1) * 2] = s[i]; - ret[(i - 1) * 2 + 1] = s[i]; - } else { - return ad::cppgtfs::gtfs::NO_COLOR; + pl.getSI()->setIsFromOsm(false); + const auto& r = + snapStation(g, &pl, &eg, &sng, opts, res, false, false, d); + groupStats(r); + for (auto n : r) { + // if the snapped station is very near to the original OSM + // station, set is-from-osm to true + if (webMercMeterDist(geom, *n->pl().getGeom()) < + opts.maxOsmStationDistance) { + if (n->pl().getSI()) n->pl().getSI()->setIsFromOsm(true); + } } } - return std::stoul("0x" + ret, &proced, 16); } - if (s == "BLACK") return 0x00000000; - if (s == "SILVER") return 0x00C0C0C0; - if (s == "GRAY") return 0x00808080; - if (s == "WHITE") return 0x00FFFFFF; - if (s == "MAROON") return 0x00800000; - if (s == "RED") return 0x00FF0000; - if (s == "PURPLE") return 0x00800080; - if (s == "FUCHSIA") return 0x00FF00FF; - if (s == "GREEN") return 0x00008000; - if (s == "LIME") return 0x0000FF00; - if (s == "OLIVE") return 0x00808000; - if (s == "YELLOW") return 0x00FFFF00; - if (s == "NAVY") return 0x00000080; - if (s == "BLUE") return 0x000000FF; - if (s == "TEAL") return 0x00008080; - if (s == "AQUA") return 0x0000FFFF; + if (!fs) return; - if (ret.empty()) return ad::cppgtfs::gtfs::NO_COLOR; - return std::stoul("0x" + ret, &proced, 16); + std::vector notSnapped; + + for (auto& s : *fs) { + bool snapped = false; + auto pl = plFromGtfs(s.first, opts); + for (size_t i = 0; i < opts.maxSnapDistances.size(); i++) { + double d = opts.maxSnapDistances[i]; + + StatGroup* group = groupStats( + snapStation(g, &pl, &eg, &sng, opts, res, + i == opts.maxSnapDistances.size() - 1, false, d)); + + if (group) { + group->addStop(s.first); + (*fs)[s.first] = *group->getNodes().begin(); + snapped = true; + } + } + if (!snapped) { + LOG(VDEBUG) << "Could not snap station " + << "(" << pl.getSI()->getName() << ")" + << " (" << s.first->getLat() << "," << s.first->getLng() + << ") in normal run, trying again later in orphan mode."; + if (!bbox.contains(*pl.getGeom())) { + LOG(VDEBUG) << "Note: '" << pl.getSI()->getName() + << "' does not lie within the bounds for this graph and " + "may be a stray station"; + } + notSnapped.push_back(s.first); + } + } + + if (notSnapped.size()) + LOG(VDEBUG) << notSnapped.size() + << " stations could not be snapped in " + "normal run, trying again in orphan " + "mode."; + + // try again, but aggressively snap to orphan OSM stations which have + // not been assigned to any GTFS stop yet + for (auto& s : notSnapped) { + bool snapped = false; + auto pl = plFromGtfs(s, opts); + for (size_t i = 0; i < opts.maxSnapDistances.size(); i++) { + double d = opts.maxSnapDistances[i]; + + StatGroup* group = groupStats( + snapStation(g, &pl, &eg, &sng, opts, res, + i == opts.maxSnapDistances.size() - 1, true, d)); + + if (group) { + group->addStop(s); + // add the added station name as an alt name to ensure future + // similarity + for (auto n : group->getNodes()) { + if (n->pl().getSI()) + n->pl().getSI()->addAltName(pl.getSI()->getName()); + } + (*fs)[s] = *group->getNodes().begin(); + snapped = true; + } + } + if (!snapped) { + // finally give up + + // add a group with only this stop in it + StatGroup* dummyGroup = new StatGroup(); + Node* dummyNode = g->addNd(pl); + + dummyNode->pl().getSI()->setGroup(dummyGroup); + dummyGroup->addNode(dummyNode); + dummyGroup->addStop(s); + (*fs)[s] = dummyNode; + if (!bbox.contains(*pl.getGeom())) { + LOG(VDEBUG) << "Could not snap station " + << "(" << pl.getSI()->getName() << ")" + << " (" << s->getLat() << "," << s->getLng() << ")"; + LOG(VDEBUG) << "Note: '" << pl.getSI()->getName() + << "' does not lie within the bounds for this graph and " + "may be a stray station"; + } else { + // only warn if it is contained in the BBOX for this graph + LOG(WARN) << "Could not snap station " + << "(" << pl.getSI()->getName() << ")" + << " (" << s->getLat() << "," << s->getLng() << ")"; + } + } + } } diff --git a/src/pfaedle/osm/OsmBuilder.h b/src/pfaedle/osm/OsmBuilder.h index e92f483..8b4c54f 100644 --- a/src/pfaedle/osm/OsmBuilder.h +++ b/src/pfaedle/osm/OsmBuilder.h @@ -25,24 +25,25 @@ #include "util/Nullable.h" #include "util/geo/Geo.h" #include "util/xml/XmlWriter.h" -#include "pfxml/pfxml.h" +#include "xml/pfxml.h" namespace pfaedle { namespace osm { -using ad::cppgtfs::gtfs::Stop; -using pfaedle::router::NodeSet; -using pfaedle::trgraph::Component; -using pfaedle::trgraph::Edge; using pfaedle::trgraph::EdgeGrid; -using pfaedle::trgraph::EdgePL; +using pfaedle::trgraph::NodeGrid; +using pfaedle::trgraph::Normalizer; using pfaedle::trgraph::Graph; using pfaedle::trgraph::Node; -using pfaedle::trgraph::NodeGrid; using pfaedle::trgraph::NodePL; -using pfaedle::trgraph::Normalizer; -using pfaedle::trgraph::StatInfo; +using pfaedle::trgraph::Edge; +using pfaedle::trgraph::EdgePL; using pfaedle::trgraph::TransitEdgeLine; +using pfaedle::trgraph::StatInfo; +using pfaedle::trgraph::StatGroup; +using pfaedle::trgraph::Component; +using pfaedle::router::NodeSet; +using ad::cppgtfs::gtfs::Stop; using util::Nullable; struct NodeCand { @@ -57,8 +58,9 @@ struct SearchFunc { }; struct EqSearch : public SearchFunc { - EqSearch() {} + explicit EqSearch(bool orphanSnap) : orphanSnap(orphanSnap) {} double minSimi = 0.9; + bool orphanSnap; bool operator()(const Node* cand, const StatInfo* si) const; }; @@ -85,19 +87,14 @@ class OsmBuilder { // Read the OSM file at path, and write a graph to g. Only elements // inside the bounding box will be read void read(const std::string& path, const OsmReadOpts& opts, Graph* g, - const BBoxIdx& box, double gridSize, Restrictor* res); + const BBoxIdx& box, size_t gridSize, router::FeedStops* fs, + Restrictor* res); // Based on the list of options, output an overpass XML query for getting // the data needed for routing void overpassQryWrite(std::ostream* out, const std::vector& opts, const BBoxIdx& latLngBox) const; - // Based on the list of options, output an osmfilter configuration file - // to filter the data needed for routing - void osmfilterRuleWrite(std::ostream* out, - const std::vector& opts, - const BBoxIdx& latLngBox) const; - // Based on the list of options, read an OSM file from in and output an // OSM file to out which contains exactly the entities that are needed // from the file at in @@ -106,8 +103,8 @@ class OsmBuilder { private: pfxml::parser_state readBBoxNds(pfxml::file* xml, OsmIdSet* nodes, - OsmIdSet* noHupNodes, const OsmFilter& filter, - const BBoxIdx& bbox) const; + OsmIdSet* noHupNodes, const OsmFilter& filter, + const BBoxIdx& bbox) const; void readRels(pfxml::file* f, RelLst* rels, RelMap* nodeRels, RelMap* wayRels, const OsmFilter& filter, const AttrKeySet& keepAttrs, @@ -143,14 +140,13 @@ class OsmBuilder { Restrictor* restor, const FlatRels& flatRels, EdgTracks* etracks, const OsmReadOpts& opts); - void readEdges(pfxml::file* xml, const RelMap& wayRels, - const OsmFilter& filter, const OsmIdSet& bBoxNodes, - const AttrKeySet& keepAttrs, OsmIdList* ret, NIdMap* nodes, - const FlatRels& flatRels); + void readEdges(pfxml::file* xml, const RelMap& wayRels, const OsmFilter& filter, + const OsmIdSet& bBoxNodes, const AttrKeySet& keepAttrs, + OsmIdList* ret, NIdMap* nodes, const FlatRels& flatRels); - OsmWay nextWay(pfxml::file* xml, const RelMap& wayRels, - const OsmFilter& filter, const OsmIdSet& bBoxNodes, - const AttrKeySet& keepAttrs, const FlatRels& flatRels) const; + OsmWay nextWay(pfxml::file* xml, const RelMap& wayRels, const OsmFilter& filter, + const OsmIdSet& bBoxNodes, const AttrKeySet& keepAttrs, + const FlatRels& flatRels) const; bool keepWay(const OsmWay& w, const RelMap& wayRels, const OsmFilter& filter, const OsmIdSet& bBoxNodes, const FlatRels& fl) const; @@ -172,45 +168,52 @@ class OsmBuilder { const AttrKeySet& keepAttrs) const; protected: - Nullable getStatInfo(osmid nid, const AttrMap& m, + Nullable getStatInfo(Node* node, osmid nid, const POINT& pos, + const AttrMap& m, StAttrGroups* groups, const RelMap& nodeRels, const RelLst& rels, const OsmReadOpts& ops) const; static void snapStats(const OsmReadOpts& opts, Graph* g, const BBoxIdx& bbox, - double gridSize, Restrictor* res, + size_t gridSize, router::FeedStops* fs, Restrictor* res, const NodeSet& orphanStations); - static void writeGeoms(Graph* g, const OsmReadOpts& opts); - static void deleteOrphNds(Graph* g, const OsmReadOpts& opts); + static void writeGeoms(Graph* g); + static void deleteOrphNds(Graph* g); + static void deleteOrphEdgs(Graph* g, const OsmReadOpts& opts); static double dist(const Node* a, const Node* b); + static double webMercDist(const Node* a, const Node* b); - static NodeGrid buildNodeIdx(Graph* g, double size, const BOX& box, + static NodeGrid buildNodeIdx(Graph* g, size_t size, const BOX& webMercBox, bool which); - static EdgeGrid buildEdgeIdx(Graph* g, double size, const BOX& box); + static EdgeGrid buildEdgeIdx(Graph* g, size_t size, const BOX& webMercBox); static void fixGaps(Graph* g, NodeGrid* ng); static void collapseEdges(Graph* g); static void writeODirEdgs(Graph* g, Restrictor* restor); static void writeSelfEdgs(Graph* g); - static void writeOneWayPens(Graph* g, const OsmReadOpts& opts); - static void writeNoLinePens(Graph* g, const OsmReadOpts& opts); static void writeEdgeTracks(const EdgTracks& tracks); static void simplifyGeoms(Graph* g); - static uint32_t writeComps(Graph* g, const OsmReadOpts& opts); + static uint32_t writeComps(Graph* g); static bool edgesSim(const Edge* a, const Edge* b); static const EdgePL& mergeEdgePL(Edge* a, Edge* b); static void getEdgCands(const POINT& s, EdgeCandPQ* ret, EdgeGrid* eg, double d); - static void snapStation(Graph* g, NodePL* s, EdgeGrid* eg, NodeGrid* sng, - const OsmReadOpts& opts, Restrictor* restor, - double maxD); + static std::set getMatchingNds(const NodePL& s, NodeGrid* ng, + double d); + + static Node* getMatchingNd(const NodePL& s, NodeGrid* ng, double d); + + static NodeSet snapStation(Graph* g, NodePL* s, EdgeGrid* eg, NodeGrid* sng, + const OsmReadOpts& opts, Restrictor* restor, + bool surHeur, bool orphSnap, double maxD); // Checks if from the edge e, a station similar to si can be reach with less // than maxD distance and less or equal to "maxFullTurns" full turns. If // such a station exists, it is returned. If not, 0 is returned. static Node* eqStatReach(const Edge* e, const StatInfo* si, const POINT& p, - double maxD, int maxFullTurns, double maxAng); + double maxD, int maxFullTurns, double maxAng, + bool orph); static Node* depthSearch(const Edge* e, const StatInfo* si, const POINT& p, double maxD, int maxFullTurns, double minAngle, @@ -220,6 +223,8 @@ class OsmBuilder { double maxD, int maxFullTurns, double minAngle); static bool keepFullTurn(const trgraph::Node* n, double ang); + static StatGroup* groupStats(const NodeSet& s); + static NodePL plFromGtfs(const Stop* s, const OsmReadOpts& ops); std::vector getLines(const std::vector& edgeRels, @@ -249,10 +254,6 @@ class OsmBuilder { bool relKeep(osmid id, const RelMap& rels, const FlatRels& fl) const; - uint32_t parseHexColor(std::string) const; - - static uint32_t costToInt(double c); - std::map _lines; std::map _relLines; }; diff --git a/src/pfaedle/osm/OsmFilter.cpp b/src/pfaedle/osm/OsmFilter.cpp index d180041..589cf5c 100644 --- a/src/pfaedle/osm/OsmFilter.cpp +++ b/src/pfaedle/osm/OsmFilter.cpp @@ -26,7 +26,6 @@ OsmFilter::OsmFilter(const OsmReadOpts& o) _posRestr(o.restrPosRestr), _negRestr(o.restrNegRestr), _noRestr(o.noRestrFilter), - _turnCycle(o.turnCycleFilter), _levels(o.levelFilters) {} // _____________________________________________________________________________ @@ -73,11 +72,6 @@ uint64_t OsmFilter::blocker(const AttrMap& attrs) const { return contained(attrs, _blocker, NODE); } -// _____________________________________________________________________________ -uint64_t OsmFilter::turnCycle(const AttrMap& attrs) const { - return contained(attrs, _turnCycle, NODE); -} - // _____________________________________________________________________________ uint64_t OsmFilter::contained(const AttrMap& attrs, const MultAttrMap& map, Type t) { diff --git a/src/pfaedle/osm/OsmFilter.h b/src/pfaedle/osm/OsmFilter.h index 757d03e..daf353a 100644 --- a/src/pfaedle/osm/OsmFilter.h +++ b/src/pfaedle/osm/OsmFilter.h @@ -5,11 +5,8 @@ #ifndef PFAEDLE_OSM_OSMFILTER_H_ #define PFAEDLE_OSM_OSMFILTER_H_ -#include - #include #include - #include "pfaedle/osm/Osm.h" #include "pfaedle/osm/OsmReadOpts.h" @@ -30,7 +27,6 @@ class OsmFilter { uint64_t onewayrev(const AttrMap& attrs) const; uint64_t station(const AttrMap& attrs) const; uint64_t blocker(const AttrMap& attrs) const; - uint64_t turnCycle(const AttrMap& attrs) const; uint64_t negRestr(const AttrMap& attrs) const; uint64_t posRestr(const AttrMap& attrs) const; std::vector getAttrKeys() const; @@ -50,7 +46,7 @@ class OsmFilter { private: MultAttrMap _keep, _drop, _nohup, _oneway, _onewayrev, _twoway, _station, - _blocker, _posRestr, _negRestr, _noRestr, _turnCycle; + _blocker, _posRestr, _negRestr, _noRestr; const MultAttrMap* _levels; }; } // namespace osm diff --git a/src/pfaedle/osm/OsmIdSet.cpp b/src/pfaedle/osm/OsmIdSet.cpp index 753c810..8b23bf9 100644 --- a/src/pfaedle/osm/OsmIdSet.cpp +++ b/src/pfaedle/osm/OsmIdSet.cpp @@ -15,7 +15,6 @@ #include #include "pfaedle/Def.h" #include "pfaedle/osm/OsmIdSet.h" -#include "util/3rdparty/MurmurHash3.h" using pfaedle::osm::OsmIdSet; @@ -29,46 +28,26 @@ OsmIdSet::OsmIdSet() _last(0), _smallest(-1), _biggest(0), - _hasInv(false), _obufpos(0), _curBlock(-1), _fsize(0) { _bitset = new std::bitset(); - _bitsetNotIn = new std::bitset(); _file = openTmpFile(); _buffer = new unsigned char[BUFFER_S]; - _outBuffer = new unsigned char[BUFFER_S]; + _outBuffer = new unsigned char[OBUFFER_S]; } // _____________________________________________________________________________ OsmIdSet::~OsmIdSet() { delete _bitset; - delete _bitsetNotIn; delete[] _buffer; if (!_closed) delete[] _outBuffer; } -// _____________________________________________________________________________ -void OsmIdSet::nadd(osmid id) { - if (_closed) throw std::exception(); - - _hasInv = true; - - uint32_t h1, h2; - MurmurHash3_x86_32(&id, 8, 469954432, &h1); - h2 = jenkins(id); - - for (int i = 0; i < 5; i++) { - uint32_t h = (h1 + i * h2) % BLOOMF_BITS; - (*_bitsetNotIn)[h] = 1; - } -} - // _____________________________________________________________________________ void OsmIdSet::add(osmid id) { if (_closed) throw std::exception(); - diskAdd(id); if (_last > id) _sorted = false; @@ -76,14 +55,7 @@ void OsmIdSet::add(osmid id) { if (id < _smallest) _smallest = id; if (id > _biggest) _biggest = id; - uint32_t h1, h2; - MurmurHash3_x86_32(&id, 8, 469954432, &h1); - h2 = jenkins(id); - - for (int i = 0; i < 5; i++) { - uint32_t h = (h1 + i * h2) % BLOOMF_BITS; - (*_bitset)[h] = 1; - } + for (int i = 0; i < 10; i++) (*_bitset)[hash(id, i)] = 1; } // _____________________________________________________________________________ @@ -97,8 +69,8 @@ void OsmIdSet::diskAdd(osmid id) { _blockEnds.push_back(id); } - if (_obufpos >= BUFFER_S) { - ssize_t w = cwrite(_file, _outBuffer, BUFFER_S); + if (_obufpos >= OBUFFER_S) { + ssize_t w = cwrite(_file, _outBuffer, OBUFFER_S); _fsize += w; _obufpos = 0; } @@ -114,8 +86,7 @@ size_t OsmIdSet::getBlock(osmid id) const { bool OsmIdSet::diskHas(osmid id) const { assert(_sorted); - auto a = std::lower_bound(_blockEnds.begin(), _blockEnds.end(), id); - if (a != _blockEnds.end() && *a == id) { + if (std::find(_blockEnds.begin(), _blockEnds.end(), id) != _blockEnds.end()) { return true; } @@ -154,23 +125,12 @@ bool OsmIdSet::has(osmid id) const { LOOKUPS++; if (!_closed) close(); - // trivial cases if (id < _smallest || id > _biggest) { return false; } - uint32_t h1, h2; - MurmurHash3_x86_32(&id, 8, 469954432, &h1); - h2 = jenkins(id); - - for (int i = 0; i < 5; i++) { - uint32_t h = (h1 + i * h2) % BLOOMF_BITS; - if ((*_bitset)[h] == 0) { - return false; - } - if (_hasInv && (*_bitsetNotIn)[h] == 0) { - return true; - } + for (int i = 0; i < 10; i++) { + if ((*_bitset)[hash(id, i)] == 0) return false; } bool has = diskHas(id); @@ -289,8 +249,8 @@ size_t OsmIdSet::cread(int f, void* buf, size_t n) const { // _____________________________________________________________________________ uint32_t OsmIdSet::knuth(uint32_t in) const { - const uint32_t a = 2654435769; - return (in * a) >> 2; + const uint32_t prime = 2654435769; + return (in * prime) >> 2; } // _____________________________________________________________________________ @@ -304,9 +264,14 @@ uint32_t OsmIdSet::jenkins(uint32_t in) const { return in >> 2; } +// _____________________________________________________________________________ +uint32_t OsmIdSet::hash(uint32_t in, int i) const { + return (knuth(in) + jenkins(in) * i) % BLOOMF_BITS; +} + // _____________________________________________________________________________ int OsmIdSet::openTmpFile() const { - const std::string& fname = util::getTmpFName("", ".pfaedle-tmp", ""); + const std::string& fname = getTmpFName("", ""); int file = open(fname.c_str(), O_RDWR | O_CREAT, 0666); // immediately unlink diff --git a/src/pfaedle/osm/OsmIdSet.h b/src/pfaedle/osm/OsmIdSet.h index 1a16b61..d19f9a1 100644 --- a/src/pfaedle/osm/OsmIdSet.h +++ b/src/pfaedle/osm/OsmIdSet.h @@ -25,7 +25,7 @@ static const size_t BUFFER_S = 8 * 64 * 1024; static const size_t SORT_BUFFER_S = 8 * 64 * 1024; static const size_t OBUFFER_S = 8 * 1024 * 1024; -#define BLOOMF_BITS 214748357 +#define BLOOMF_BITS 400000000 /* * A disk-based set for OSM ids. Read-access for checking the presence is @@ -39,9 +39,6 @@ class OsmIdSet { // Add an OSM id void add(osmid id); - // Add an OSM id that is NOT contained - void nadd(osmid id); - // Check if an OSM id is contained bool has(osmid id) const; @@ -60,8 +57,6 @@ class OsmIdSet { osmid _smallest; osmid _biggest; - bool _hasInv; - size_t _obufpos; mutable size_t _curBlock; mutable ssize_t _curBlockSize; @@ -69,14 +64,13 @@ class OsmIdSet { // bloom filter std::bitset* _bitset; - std::bitset* _bitsetNotIn; - mutable std::vector _blockEnds; mutable size_t _fsize; uint32_t knuth(uint32_t in) const; uint32_t jenkins(uint32_t in) const; + uint32_t hash(uint32_t in, int i) const; void diskAdd(osmid id); void close() const; void sort() const; diff --git a/src/pfaedle/osm/OsmReadOpts.h b/src/pfaedle/osm/OsmReadOpts.h index 5678c51..47d78ae 100644 --- a/src/pfaedle/osm/OsmReadOpts.h +++ b/src/pfaedle/osm/OsmReadOpts.h @@ -5,14 +5,14 @@ #ifndef PFAEDLE_OSM_OSMREADOPTS_H_ #define PFAEDLE_OSM_OSMREADOPTS_H_ -#include #include -#include +#include #include #include -#include +#include #include #include +#include #include "pfaedle/osm/Osm.h" #include "pfaedle/trgraph/Graph.h" #include "pfaedle/trgraph/Normalizer.h" @@ -77,12 +77,11 @@ struct RelLineRules { AttrLst sNameRule; AttrLst fromNameRule; AttrLst toNameRule; - AttrLst colorRule; }; inline bool operator==(const RelLineRules& a, const RelLineRules& b) { return a.sNameRule == b.sNameRule && a.fromNameRule == b.fromNameRule && - a.toNameRule == b.toNameRule && a.colorRule == b.colorRule; + a.toNameRule == b.toNameRule; } struct StationAttrRules { @@ -95,6 +94,21 @@ inline bool operator==(const StationAttrRules& a, const StationAttrRules& b) { return a.nameRule == b.nameRule && a.platformRule == b.platformRule; } +struct StatGroupNAttrRule { + DeepAttrRule attr; + double maxDist; +}; + +inline bool operator==(const StatGroupNAttrRule& a, + const StatGroupNAttrRule& b) { + return a.attr == b.attr && a.maxDist == b.maxDist; +} + +typedef std::unordered_map< + std::string, + std::unordered_map>> + StAttrGroups; + struct OsmReadOpts { OsmReadOpts() {} @@ -107,7 +121,7 @@ struct OsmReadOpts { MultAttrMap twoWayFilter; MultAttrMap stationFilter; MultAttrMap stationBlockerFilter; - MultAttrMap turnCycleFilter; + std::vector statGroupNAttrRules; trgraph::Normalizer statNormzer; trgraph::Normalizer lineNormzer; @@ -122,23 +136,14 @@ struct OsmReadOpts { uint8_t maxSnapLevel; double maxAngleSnapReach; - double maxSnapDistance; - double maxStationCandDistance; + std::vector maxSnapDistances; + double maxSnapFallbackHeurDistance; double maxBlockDistance; - double maxSpeed; - double maxSpeedCorFac; + double maxOsmStationDistance; - std::vector maxOsmStationDistances; - - // given in km/h, but store in m/s - double levelDefSpeed[8] = {85 * 0.2777, 70 * 0.2777, 55 * 0.2777, 40 * 0.2777, - 30 * 0.2777, 20 * 0.2777, 10 * 0.2777, 5 * 0.2777}; - - double oneWaySpeedPen; - double oneWayEntryCost; - - double noLinesPunishFact; + // TODO(patrick): this is not implemented yet + double levelSnapPunishFac[7] = {0, 0, 0, 0, 0, 0, 0}; double fullTurnAngle; @@ -149,10 +154,9 @@ struct OsmReadOpts { }; inline bool operator==(const OsmReadOpts& a, const OsmReadOpts& b) { - if (a.maxOsmStationDistances.size() != b.maxOsmStationDistances.size()) - return false; - for (size_t i = 0; i < a.maxOsmStationDistances.size(); i++) { - if (fabs(a.maxOsmStationDistances[i] - b.maxOsmStationDistances[i]) >= 0.1) + if (a.maxSnapDistances.size() != b.maxSnapDistances.size()) return false; + for (size_t i = 0; i < a.maxSnapDistances.size(); i++) { + if (fabs(a.maxSnapDistances[i] - b.maxSnapDistances[i]) >= 0.1) return false; } @@ -169,29 +173,24 @@ inline bool operator==(const OsmReadOpts& a, const OsmReadOpts& b) { a.twoWayFilter == b.twoWayFilter && a.stationFilter == b.stationFilter && a.stationBlockerFilter == b.stationBlockerFilter && - a.turnCycleFilter == b.turnCycleFilter && + a.statGroupNAttrRules == b.statGroupNAttrRules && a.statNormzer == b.statNormzer && a.lineNormzer == b.lineNormzer && a.trackNormzer == b.trackNormzer && a.relLinerules == b.relLinerules && a.statAttrRules == b.statAttrRules && a.maxSnapLevel == b.maxSnapLevel && fabs(a.maxAngleSnapReach - b.maxAngleSnapReach) < 0.1 && - fabs(a.maxSnapDistance - b.maxSnapDistance) < 0.1 && - fabs(a.maxStationCandDistance - b.maxStationCandDistance) < 0.1 && + fabs(a.maxOsmStationDistance - b.maxOsmStationDistance) < 0.1 && + fabs(a.maxSnapFallbackHeurDistance - b.maxSnapFallbackHeurDistance) < + 0.1 && fabs(a.maxBlockDistance - b.maxBlockDistance) < 0.1 && - fabs(a.levelDefSpeed[0] - b.levelDefSpeed[0]) < 0.1 && - fabs(a.levelDefSpeed[1] - b.levelDefSpeed[1]) < 0.1 && - fabs(a.levelDefSpeed[2] - b.levelDefSpeed[2]) < 0.1 && - fabs(a.levelDefSpeed[3] - b.levelDefSpeed[3]) < 0.1 && - fabs(a.levelDefSpeed[4] - b.levelDefSpeed[4]) < 0.1 && - fabs(a.levelDefSpeed[5] - b.levelDefSpeed[5]) < 0.1 && - fabs(a.levelDefSpeed[6] - b.levelDefSpeed[6]) < 0.1 && - fabs(a.levelDefSpeed[7] - b.levelDefSpeed[7]) < 0.1 && - fabs(a.oneWaySpeedPen - b.oneWaySpeedPen) < 0.1 && - fabs(a.oneWayEntryCost - b.oneWayEntryCost) < 0.1 && - fabs(a.noLinesPunishFact - b.noLinesPunishFact) < 0.1 && + fabs(a.levelSnapPunishFac[0] - b.levelSnapPunishFac[0]) < 0.1 && + fabs(a.levelSnapPunishFac[1] - b.levelSnapPunishFac[1]) < 0.1 && + fabs(a.levelSnapPunishFac[2] - b.levelSnapPunishFac[2]) < 0.1 && + fabs(a.levelSnapPunishFac[3] - b.levelSnapPunishFac[3]) < 0.1 && + fabs(a.levelSnapPunishFac[4] - b.levelSnapPunishFac[4]) < 0.1 && + fabs(a.levelSnapPunishFac[5] - b.levelSnapPunishFac[5]) < 0.1 && + fabs(a.levelSnapPunishFac[6] - b.levelSnapPunishFac[6]) < 0.1 && fabs(a.fullTurnAngle - b.fullTurnAngle) < 0.1 && - fabs(a.maxSpeedCorFac - b.maxSpeedCorFac) < 0.1 && - fabs(a.maxSpeed - b.maxSpeed) < 0.1 && a.restrPosRestr == b.restrPosRestr && a.restrNegRestr == b.restrNegRestr && a.noRestrFilter == b.noRestrFilter; diff --git a/src/pfaedle/router/Comp.h b/src/pfaedle/router/Comp.h index cdadc21..349ff04 100644 --- a/src/pfaedle/router/Comp.h +++ b/src/pfaedle/router/Comp.h @@ -16,7 +16,7 @@ namespace router { using util::editDist; // _____________________________________________________________________________ -inline bool statSimi(const std::string& a, const std::string& b) { +inline double statSimi(const std::string& a, const std::string& b) { if (a == b) return 1; if (a.empty() || b.empty()) return 0; @@ -55,7 +55,7 @@ inline bool statSimi(const std::string& a, const std::string& b) { } // _____________________________________________________________________________ -inline bool lineSimi(const std::string& a, const std::string& b) { +inline double lineSimi(const std::string& a, const std::string& b) { if (a == b) return 1; if (a.empty() || b.empty()) return 0; diff --git a/src/pfaedle/router/EdgePL.cpp b/src/pfaedle/router/EdgePL.cpp new file mode 100644 index 0000000..a166b5b --- /dev/null +++ b/src/pfaedle/router/EdgePL.cpp @@ -0,0 +1,88 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#include "pfaedle/Def.h" +#include "util/geo/Geo.h" +#include "pfaedle/router/EdgePL.h" +#include "pfaedle/router/Router.h" +#include "util/String.h" + +using pfaedle::router::EdgePL; +using pfaedle::router::EdgeCost; +using pfaedle::router::EdgeList; +using pfaedle::trgraph::Node; + +// _____________________________________________________________________________ +EdgeList* EdgePL::getEdges() { return &_edges; } + +// _____________________________________________________________________________ +const EdgeList& EdgePL::getEdges() const { return _edges; } + +// _____________________________________________________________________________ +const POINT& EdgePL::frontHop() const { + if (!_edges.size()) return *_end->pl().getGeom(); + return _edges.back()->pl().frontHop(); +} + +// _____________________________________________________________________________ +const POINT& EdgePL::backHop() const { + if (!_edges.size()) return *_start->pl().getGeom(); + return _edges.front()->pl().backHop(); +} + +// _____________________________________________________________________________ +const Node* EdgePL::backNode() const { return _end; } + +// _____________________________________________________________________________ +const Node* EdgePL::frontNode() const { return _start; } + +// _____________________________________________________________________________ +const LINE* EdgePL::getGeom() const { + if (!_edges.size()) return 0; + if (!_geom.size()) { + const trgraph::Node* l = _start; + for (auto i = _edges.rbegin(); i != _edges.rend(); i++) { + const auto e = *i; + if ((e->getFrom() == l) ^ e->pl().isRev()) { + _geom.insert(_geom.end(), e->pl().getGeom()->begin(), + e->pl().getGeom()->end()); + } else { + _geom.insert(_geom.end(), e->pl().getGeom()->rbegin(), + e->pl().getGeom()->rend()); + } + l = e->getOtherNd(l); + } + } + + return &_geom; +} + +// _____________________________________________________________________________ +void EdgePL::setStartNode(const trgraph::Node* s) { _start = s; } + +// _____________________________________________________________________________ +void EdgePL::setEndNode(const trgraph::Node* e) { _end = e; } + +// _____________________________________________________________________________ +void EdgePL::setStartEdge(const trgraph::Edge* s) { _startE = s; } + +// _____________________________________________________________________________ +void EdgePL::setEndEdge(const trgraph::Edge* e) { _endE = e; } + +// _____________________________________________________________________________ +const EdgeCost& EdgePL::getCost() const { return _cost; } + +// _____________________________________________________________________________ +void EdgePL::setCost(const router::EdgeCost& c) { _cost = c; } + +// _____________________________________________________________________________ +util::json::Dict EdgePL::getAttrs() const { + util::json::Dict obj; + obj["cost"] = std::to_string(_cost.getValue()); + obj["from_edge"] = util::toString(_startE); + obj["to_edge"] = util::toString(_endE); + obj["dummy"] = _edges.size() ? "no" : "yes"; + + return obj; +} diff --git a/src/pfaedle/router/EdgePL.h b/src/pfaedle/router/EdgePL.h new file mode 100644 index 0000000..024b9a2 --- /dev/null +++ b/src/pfaedle/router/EdgePL.h @@ -0,0 +1,51 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_ROUTER_EDGEPL_H_ +#define PFAEDLE_ROUTER_EDGEPL_H_ + +#include +#include +#include "pfaedle/Def.h" +#include "pfaedle/router/Misc.h" +#include "util/geo/Geo.h" +#include "util/geo/GeoGraph.h" + +using util::geograph::GeoEdgePL; + +namespace pfaedle { +namespace router { + +class EdgePL { + public: + EdgePL() : _cost(), _start(0), _end(0), _startE(0), _endE(0) {} + const LINE* getGeom() const; + util::json::Dict getAttrs() const; + router::EdgeList* getEdges(); + const router::EdgeList& getEdges() const; + void setStartNode(const trgraph::Node* s); + void setEndNode(const trgraph::Node* s); + void setStartEdge(const trgraph::Edge* s); + void setEndEdge(const trgraph::Edge* s); + const router::EdgeCost& getCost() const; + void setCost(const router::EdgeCost& c); + const POINT& frontHop() const; + const POINT& backHop() const; + const trgraph::Node* frontNode() const; + const trgraph::Node* backNode() const; + + private: + router::EdgeCost _cost; + // the edges are in this field in REVERSED ORDER! + router::EdgeList _edges; + const trgraph::Node* _start; + const trgraph::Node* _end; + const trgraph::Edge* _startE; + const trgraph::Edge* _endE; + mutable LINE _geom; +}; +} // namespace router +} // namespace pfaedle + +#endif // PFAEDLE_ROUTER_EDGEPL_H_ diff --git a/src/pfaedle/router/Graph.h b/src/pfaedle/router/Graph.h new file mode 100644 index 0000000..88d7345 --- /dev/null +++ b/src/pfaedle/router/Graph.h @@ -0,0 +1,26 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_ROUTER_GRAPH_H_ +#define PFAEDLE_ROUTER_GRAPH_H_ + +#include "pfaedle/trgraph/Graph.h" +#include "pfaedle/router/EdgePL.h" +#include "pfaedle/router/NodePL.h" +#include "util/graph/DirGraph.h" + +using util::geo::Point; +using util::geo::Line; + +namespace pfaedle { +namespace router { + +typedef util::graph::Edge Edge; +typedef util::graph::Node Node; +typedef util::graph::DirGraph Graph; + +} // namespace router +} // namespace pfaedle + +#endif // PFAEDLE_ROUTER_GRAPH_H_ diff --git a/src/pfaedle/router/HopCache.cpp b/src/pfaedle/router/HopCache.cpp deleted file mode 100644 index bb53290..0000000 --- a/src/pfaedle/router/HopCache.cpp +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2020, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#include -#include -#include "pfaedle/router/HopCache.h" -#include "pfaedle/trgraph/Graph.h" -#include "util/Misc.h" - -using pfaedle::router::HopCache; -using pfaedle::trgraph::Edge; - -// _____________________________________________________________________________ -void HopCache::setMin(const Edge* a, const Edge* b, uint32_t val) { - _cache.set(a, b, val); -} - -// _____________________________________________________________________________ -void HopCache::setEx(const Edge* a, const Edge* b, uint32_t val) { - int64_t v = val; - _cache.set(a, b, -(v + 1)); -} - -// _____________________________________________________________________________ -void HopCache::setMin(const Edge* a, const std::set& b, uint32_t val) { - for (auto eb : b) _cache.set(a, eb, val); -} - -// _____________________________________________________________________________ -void HopCache::setMin(const std::set& a, const Edge* b, uint32_t val) { - for (auto ea : a) _cache.set(ea, b, val); -} - -// _____________________________________________________________________________ -std::pair HopCache::get(const Edge* a, const Edge* b) const { - int64_t v = _cache.get(a, b); - if (v < 0) return {(-v) - 1, 1}; - return {v, 0}; -} diff --git a/src/pfaedle/router/HopCache.h b/src/pfaedle/router/HopCache.h deleted file mode 100644 index 43d17af..0000000 --- a/src/pfaedle/router/HopCache.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2020, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef PFAEDLE_ROUTER_HOPCACHE_H_ -#define PFAEDLE_ROUTER_HOPCACHE_H_ - -#include -#include -#include -#include "pfaedle/trgraph/Graph.h" -#include "util/Misc.h" - -namespace pfaedle { -namespace router { - -class HopCache { - public: - void setMin(const trgraph::Edge* a, const trgraph::Edge* b, uint32_t val); - - void setMin(const trgraph::Edge* a, const std::set& b, - uint32_t val); - - void setMin(const std::set& a, const trgraph::Edge* b, - uint32_t val); - - void setEx(const trgraph::Edge* a, const trgraph::Edge* b, uint32_t val); - - std::pair get(const trgraph::Edge* a, - const trgraph::Edge* b) const; - - private: - util::SparseMatrix _cache; -}; - -} // namespace router -} // namespace pfaedle - -#endif // PFAEDLE_ROUTER_HOPCACHE_H_ diff --git a/src/pfaedle/router/Misc.h b/src/pfaedle/router/Misc.h index f6fc197..1c69c40 100644 --- a/src/pfaedle/router/Misc.h +++ b/src/pfaedle/router/Misc.h @@ -9,7 +9,6 @@ #include #include #include - #include "ad/cppgtfs/gtfs/Feed.h" #include "ad/cppgtfs/gtfs/Route.h" #include "pfaedle/gtfs/Feed.h" @@ -22,79 +21,132 @@ using ad::cppgtfs::gtfs::Stop; namespace pfaedle { namespace router { -extern double time; +struct NodeCand { + trgraph::Node* nd; + double pen; +}; struct EdgeCand { trgraph::Edge* e; double pen; - double progr; - POINT point; - int time; - std::vector depPrede; }; struct RoutingOpts { RoutingOpts() - : fullTurnPunishFac(1000), + : fullTurnPunishFac(2000), fullTurnAngle(45), - lineUnmatchedPunishFact(1), - lineNameFromUnmatchedPunishFact(1), - lineNameToUnmatchedPunishFact(1), - noLinesPunishFact(1), + passThruStationsPunish(100), + oneWayPunishFac(1), + oneWayEdgePunish(0), + lineUnmatchedPunishFact(0.5), + noLinesPunishFact(0), platformUnmatchedPen(0), stationDistPenFactor(0), - turnRestrCost(0), popReachEdge(true), noSelfHops(true) {} - uint32_t fullTurnPunishFac; + double fullTurnPunishFac; double fullTurnAngle; + double passThruStationsPunish; + double oneWayPunishFac; + double oneWayEdgePunish; double lineUnmatchedPunishFact; - double lineNameFromUnmatchedPunishFact; - double lineNameToUnmatchedPunishFact; double noLinesPunishFact; double platformUnmatchedPen; - double stationUnmatchedPen; double stationDistPenFactor; - double nonStationPen; - uint32_t turnRestrCost; + double nonOsmPen; + double levelPunish[8]; bool popReachEdge; bool noSelfHops; - bool useStations; - double transitionPen; - std::string transPenMethod; - std::string emPenMethod; - std::string statsimiMethod; }; // _____________________________________________________________________________ inline bool operator==(const RoutingOpts& a, const RoutingOpts& b) { - return a.fullTurnPunishFac == b.fullTurnPunishFac && + return fabs(a.fullTurnPunishFac - b.fullTurnPunishFac) < 0.01 && fabs(a.fullTurnAngle - b.fullTurnAngle) < 0.01 && + fabs(a.passThruStationsPunish - b.passThruStationsPunish) < 0.01 && + fabs(a.oneWayPunishFac - b.oneWayPunishFac) < 0.01 && + fabs(a.oneWayEdgePunish - b.oneWayEdgePunish) < 0.01 && fabs(a.lineUnmatchedPunishFact - b.lineUnmatchedPunishFact) < 0.01 && - fabs(a.lineNameFromUnmatchedPunishFact - - b.lineNameFromUnmatchedPunishFact) < 0.01 && - fabs(a.lineNameToUnmatchedPunishFact - - b.lineNameToUnmatchedPunishFact) < 0.01 && fabs(a.noLinesPunishFact - b.noLinesPunishFact) < 0.01 && fabs(a.platformUnmatchedPen - b.platformUnmatchedPen) < 0.01 && - fabs(a.stationUnmatchedPen - b.stationUnmatchedPen) < 0.01 && fabs(a.stationDistPenFactor - b.stationDistPenFactor) < 0.01 && - a.turnRestrCost == b.turnRestrCost && - fabs(a.transitionPen - b.transitionPen) < 0.01 && - fabs(a.nonStationPen - b.nonStationPen) < 0.01 && - a.transPenMethod == b.transPenMethod && - a.emPenMethod == b.emPenMethod && - a.statsimiMethod == b.statsimiMethod && - a.useStations == b.useStations && a.popReachEdge == b.popReachEdge && - a.noSelfHops == b.noSelfHops; + fabs(a.nonOsmPen - b.nonOsmPen) < 0.01 && + fabs(a.levelPunish[0] - b.levelPunish[0]) < 0.01 && + fabs(a.levelPunish[1] - b.levelPunish[1]) < 0.01 && + fabs(a.levelPunish[2] - b.levelPunish[2]) < 0.01 && + fabs(a.levelPunish[3] - b.levelPunish[3]) < 0.01 && + fabs(a.levelPunish[4] - b.levelPunish[4]) < 0.01 && + fabs(a.levelPunish[5] - b.levelPunish[5]) < 0.01 && + fabs(a.levelPunish[6] - b.levelPunish[6]) < 0.01 && + fabs(a.levelPunish[7] - b.levelPunish[7]) < 0.01 && + a.popReachEdge == b.popReachEdge && a.noSelfHops == b.noSelfHops; +} + +struct EdgeCost { + EdgeCost() : _cost(0) {} + explicit EdgeCost(double cost) : _cost(cost) {} + EdgeCost(double mDist, double mDistLvl1, double mDistLvl2, double mDistLvl3, + double mDistLvl4, double mDistLvl5, double mDistLvl6, + double mDistLvl7, uint32_t fullTurns, int32_t passThru, + double oneWayMeters, size_t oneWayEdges, double lineUnmatchedMeters, + double noLinesMeters, double reachPen, const RoutingOpts* o) { + if (!o) { + _cost = mDist + reachPen; + } else { + _cost = mDist * o->levelPunish[0] + mDistLvl1 * o->levelPunish[1] + + mDistLvl2 * o->levelPunish[2] + mDistLvl3 * o->levelPunish[3] + + mDistLvl4 * o->levelPunish[4] + mDistLvl5 * o->levelPunish[5] + + mDistLvl6 * o->levelPunish[6] + mDistLvl7 * o->levelPunish[7] + + oneWayMeters * o->oneWayPunishFac + + oneWayEdges * o->oneWayEdgePunish + + lineUnmatchedMeters * o->lineUnmatchedPunishFact + + noLinesMeters * o->noLinesPunishFact + + fullTurns * o->fullTurnPunishFac + + passThru * o->passThruStationsPunish + reachPen; + } + } + + float _cost; + + double getValue() const { return _cost; } +}; + +// _____________________________________________________________________________ +inline EdgeCost operator+(const EdgeCost& a, const EdgeCost& b) { + return EdgeCost(a.getValue() + b.getValue()); +} + +// _____________________________________________________________________________ +inline bool operator<=(const EdgeCost& a, const EdgeCost& b) { + return a.getValue() <= b.getValue(); +} + +// _____________________________________________________________________________ +inline bool operator==(const EdgeCost& a, const EdgeCost& b) { + return a.getValue() == b.getValue(); +} + +// _____________________________________________________________________________ +inline bool operator>(const EdgeCost& a, const EdgeCost& b) { + return a.getValue() > b.getValue(); +} + +// _____________________________________________________________________________ +template +inline bool angSmaller(const Point& f, const Point& m, const Point& t, + double ang) { + if (util::geo::innerProd(m, f, t) < ang) return 1; + return 0; } typedef std::set NodeSet; typedef std::set EdgeSet; typedef std::unordered_map FeedStops; +typedef std::vector NodeCandGroup; +typedef std::vector NodeCandRoute; + typedef std::vector EdgeCandGroup; -typedef std::vector EdgeCandMap; typedef std::vector EdgeCandRoute; typedef std::vector EdgeList; @@ -102,12 +154,8 @@ typedef std::vector NodeList; struct EdgeListHop { EdgeList edges; - const trgraph::Edge* start; - const trgraph::Edge* end; - double progrStart; - double progrEnd; - POINT pointStart; - POINT pointEnd; + const trgraph::Node* start; + const trgraph::Node* end; }; typedef std::vector EdgeListHops; @@ -149,27 +197,9 @@ inline pfaedle::router::FeedStops writeMotStops(const pfaedle::gtfs::Feed* feed, // _____________________________________________________________________________ inline std::string getMotStr(const MOTs& mots) { - MOTs tmp = mots; bool first = false; std::string motStr; - - std::string names[11] = {"tram", "subway", "rail", "bus", - "ferry", "cablecar", "gondola", "funicular", - "coach", "trolleybus", "monorail"}; - - for (const auto& n : names) { - const auto& types = ad::cppgtfs::gtfs::flat::Route::getTypesFromString(n); - const auto& isect = motISect(tmp, types); - - if (isect.size() == types.size()) { - if (first) motStr += ", "; - motStr += "{" + n + "}"; - first = true; - for (const auto& mot : isect) tmp.erase(mot); - } - } - - for (const auto& mot : tmp) { + for (const auto& mot : mots) { if (first) motStr += ", "; motStr += "<" + ad::cppgtfs::gtfs::flat::Route::getTypeString(mot) + ">"; first = true; diff --git a/src/pfaedle/router/NodePL.h b/src/pfaedle/router/NodePL.h new file mode 100644 index 0000000..a9c2ea4 --- /dev/null +++ b/src/pfaedle/router/NodePL.h @@ -0,0 +1,40 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_ROUTER_NODEPL_H_ +#define PFAEDLE_ROUTER_NODEPL_H_ + +#include +#include +#include "pfaedle/trgraph/Graph.h" +#include "util/geo/GeoGraph.h" +#include "util/geo/Geo.h" +#include "pfaedle/Def.h" + +using util::geograph::GeoNodePL; + + +namespace pfaedle { +namespace router { + +class NodePL { + public: + NodePL() : _n(0) {} + NodePL(const pfaedle::trgraph::Node* n) : _n(n) {} // NOLINT + + const POINT* getGeom() const { + return !_n ? 0 : _n->pl().getGeom(); + } + util::json::Dict getAttrs() const { + if (_n) return _n->pl().getAttrs(); + return util::json::Dict(); + } + + private: + const pfaedle::trgraph::Node* _n; +}; +} // namespace router +} // namespace pfaedle + +#endif // PFAEDLE_ROUTER_NODEPL_H_ diff --git a/src/pfaedle/router/Router.cpp b/src/pfaedle/router/Router.cpp new file mode 100644 index 0000000..c2e0055 --- /dev/null +++ b/src/pfaedle/router/Router.cpp @@ -0,0 +1,646 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifdef _OPENMP +#include +#else +#define omp_get_thread_num() 0 +#define omp_get_num_procs() 1 +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include "pfaedle/router/Comp.h" +#include "pfaedle/router/Router.h" +#include "pfaedle/router/RoutingAttrs.h" +#include "util/geo/output/GeoGraphJsonOutput.h" +#include "util/graph/Dijkstra.h" +#include "util/graph/EDijkstra.h" +#include "util/log/Log.h" + +using pfaedle::router::Router; +using pfaedle::router::EdgeCost; +using pfaedle::router::CostFunc; +using pfaedle::router::DistHeur; +using pfaedle::router::NCostFunc; +using pfaedle::router::NDistHeur; +using pfaedle::router::CombCostFunc; +using pfaedle::router::EdgeListHop; +using pfaedle::router::EdgeListHops; +using pfaedle::router::RoutingOpts; +using pfaedle::router::RoutingAttrs; +using pfaedle::router::HopBand; +using pfaedle::router::NodeCandRoute; +using util::graph::EDijkstra; +using util::graph::Dijkstra; +using util::geo::webMercMeterDist; + +// _____________________________________________________________________________ +EdgeCost NCostFunc::operator()(const trgraph::Node* from, + const trgraph::Edge* e, + const trgraph::Node* to) const { + UNUSED(to); + if (!from) return EdgeCost(); + + int oneway = e->pl().oneWay() == 2; + int32_t stationSkip = 0; + + return EdgeCost(e->pl().lvl() == 0 ? e->pl().getLength() : 0, + e->pl().lvl() == 1 ? e->pl().getLength() : 0, + e->pl().lvl() == 2 ? e->pl().getLength() : 0, + e->pl().lvl() == 3 ? e->pl().getLength() : 0, + e->pl().lvl() == 4 ? e->pl().getLength() : 0, + e->pl().lvl() == 5 ? e->pl().getLength() : 0, + e->pl().lvl() == 6 ? e->pl().getLength() : 0, + e->pl().lvl() == 7 ? e->pl().getLength() : 0, 0, stationSkip, + e->pl().getLength() * oneway, oneway, 0, 0, 0, &_rOpts); +} + +// _____________________________________________________________________________ +EdgeCost CostFunc::operator()(const trgraph::Edge* from, const trgraph::Node* n, + const trgraph::Edge* to) const { + if (!from) return EdgeCost(); + + uint32_t fullTurns = 0; + int oneway = from->pl().oneWay() == 2; + int32_t stationSkip = 0; + + if (n) { + if (from->getFrom() == to->getTo() && from->getTo() == to->getFrom()) { + // trivial full turn + fullTurns = 1; + } else if (n->getDeg() > 2) { + // otherwise, only intersection angles will be punished + fullTurns = router::angSmaller(from->pl().backHop(), *n->pl().getGeom(), + to->pl().frontHop(), _rOpts.fullTurnAngle); + } + + if (from->pl().isRestricted() && !_res.may(from, to, n)) oneway = 1; + + // for debugging + n->pl().setVisited(); + + if (_tgGrp && n->pl().getSI() && n->pl().getSI()->getGroup() != _tgGrp) + stationSkip = 1; + } + + double transitLinePen = transitLineCmp(from->pl()); + bool noLines = (_rAttrs.shortName.empty() && _rAttrs.toString.empty() && + _rAttrs.fromString.empty() && from->pl().getLines().empty()); + + return EdgeCost(from->pl().lvl() == 0 ? from->pl().getLength() : 0, + from->pl().lvl() == 1 ? from->pl().getLength() : 0, + from->pl().lvl() == 2 ? from->pl().getLength() : 0, + from->pl().lvl() == 3 ? from->pl().getLength() : 0, + from->pl().lvl() == 4 ? from->pl().getLength() : 0, + from->pl().lvl() == 5 ? from->pl().getLength() : 0, + from->pl().lvl() == 6 ? from->pl().getLength() : 0, + from->pl().lvl() == 7 ? from->pl().getLength() : 0, fullTurns, + stationSkip, from->pl().getLength() * oneway, oneway, + from->pl().getLength() * transitLinePen, + noLines ? from->pl().getLength() : 0, 0, &_rOpts); +} + +// _____________________________________________________________________________ +double CostFunc::transitLineCmp(const trgraph::EdgePL& e) const { + if (_rAttrs.shortName.empty() && _rAttrs.toString.empty() && + _rAttrs.fromString.empty()) + return 0; + double best = 1; + for (const auto* l : e.getLines()) { + double cur = _rAttrs.simi(l); + + if (cur < 0.0001) return 0; + if (cur < best) best = cur; + } + + return best; +} + +// _____________________________________________________________________________ +NDistHeur::NDistHeur(const RoutingOpts& rOpts, + const std::set& tos) + : _rOpts(rOpts), _maxCentD(0) { + size_t c = 0; + double x = 0, y = 0; + for (auto to : tos) { + x += to->pl().getGeom()->getX(); + y += to->pl().getGeom()->getY(); + c++; + } + + x /= c; + y /= c; + _center = POINT(x, y); + + for (auto to : tos) { + double cur = webMercMeterDist(*to->pl().getGeom(), _center); + if (cur > _maxCentD) _maxCentD = cur; + } +} + +// _____________________________________________________________________________ +DistHeur::DistHeur(uint8_t minLvl, const RoutingOpts& rOpts, + const std::set& tos) + : _rOpts(rOpts), _lvl(minLvl), _maxCentD(0) { + size_t c = 0; + double x = 0, y = 0; + for (auto to : tos) { + x += to->getFrom()->pl().getGeom()->getX(); + y += to->getFrom()->pl().getGeom()->getY(); + c++; + } + + x /= c; + y /= c; + _center = POINT(x, y); + + for (auto to : tos) { + double cur = webMercMeterDist(*to->getFrom()->pl().getGeom(), _center) * + _rOpts.levelPunish[_lvl]; + if (cur > _maxCentD) _maxCentD = cur; + } +} + +// _____________________________________________________________________________ +EdgeCost DistHeur::operator()(const trgraph::Edge* a, + const std::set& b) const { + UNUSED(b); + double cur = webMercMeterDist(*a->getFrom()->pl().getGeom(), _center) * + _rOpts.levelPunish[_lvl]; + + return EdgeCost(cur - _maxCentD, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +} + +// _____________________________________________________________________________ +EdgeCost NDistHeur::operator()(const trgraph::Node* a, + const std::set& b) const { + UNUSED(b); + double cur = webMercMeterDist(*a->pl().getGeom(), _center); + + return EdgeCost(cur - _maxCentD, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +} + +// _____________________________________________________________________________ +double CombCostFunc::operator()(const router::Edge* from, const router::Node* n, + const router::Edge* to) const { + UNUSED(n); + UNUSED(from); + return to->pl().getCost().getValue(); +} + +// _____________________________________________________________________________ +Router::Router(size_t numThreads, bool caching) + : _cache(numThreads), _caching(caching) { + for (size_t i = 0; i < numThreads; i++) { + _cache[i] = new Cache(); + } +} + +// _____________________________________________________________________________ +Router::~Router() { + for (size_t i = 0; i < _cache.size(); i++) { + delete _cache[i]; + } +} + +// _____________________________________________________________________________ +bool Router::compConned(const EdgeCandGroup& a, const EdgeCandGroup& b) const { + for (auto n1 : a) { + for (auto n2 : b) { + if (n1.e->getFrom()->pl().getComp() == n2.e->getFrom()->pl().getComp()) + return true; + } + } + + return false; +} + +// _____________________________________________________________________________ +HopBand Router::getHopBand(const EdgeCandGroup& a, const EdgeCandGroup& b, + const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& rest) const { + assert(a.size()); + assert(b.size()); + + double pend = 0; + for (size_t i = 0; i < a.size(); i++) { + for (size_t j = 0; j < b.size(); j++) { + double d = webMercMeterDist(*a[i].e->getFrom()->pl().getGeom(), + *b[j].e->getFrom()->pl().getGeom()); + if (d > pend) pend = d; + } + } + + LOG(VDEBUG) << "Pending max hop distance is " << pend << " meters"; + + const trgraph::StatGroup* tgGrpTo = 0; + + if (b.begin()->e->getFrom()->pl().getSI()) + tgGrpTo = b.begin()->e->getFrom()->pl().getSI()->getGroup(); + + CostFunc costF(rAttrs, rOpts, rest, tgGrpTo, pend * 50); + + std::set from, to; + + for (auto e : a) from.insert(e.e); + for (auto e : b) to.insert(e.e); + + LOG(VDEBUG) << "Doing pilot run between " << from.size() << "->" << to.size() + << " edge candidates"; + + EdgeList el; + EdgeCost ret = costF.inf(); + DistHeur distH(0, rOpts, to); + + if (compConned(a, b)) + ret = EDijkstra::shortestPath(from, to, costF, distH, &el); + + if (el.size() < 2 && costF.inf() <= ret) { + LOG(VDEBUG) << "Pilot run: no connection between candidate groups," + << " setting max distance to 1"; + return HopBand{0, 1, 0, 0}; + } + + // cache the found path, will save a few dijkstra iterations + nestedCache(&el, from, costF, rAttrs); + + auto na = el.back()->getFrom(); + auto nb = el.front()->getFrom(); + + double maxStrD = 0; + + for (auto e : to) { + double d = webMercMeterDist(*el.front()->getFrom()->pl().getGeom(), + *e->getTo()->pl().getGeom()); + if (d > maxStrD) maxStrD = d; + } + + // TODO(patrick): derive the punish level here automatically + double maxD = std::max(ret.getValue(), pend * rOpts.levelPunish[2]) * 3 + + rOpts.fullTurnPunishFac + rOpts.platformUnmatchedPen; + double minD = ret.getValue(); + + LOG(VDEBUG) << "Pilot run: min distance between two groups is " + << ret.getValue() << " (between nodes " << na << " and " << nb + << "), using a max routing distance of " << maxD << ". The max" + << " straight line distance from the pilot target to any other " + "target node was" + << " " << maxStrD << "."; + + return HopBand{minD, maxD, el.front(), maxStrD}; +} + +// _____________________________________________________________________________ +EdgeListHops Router::routeGreedy(const NodeCandRoute& route, + const RoutingAttrs& rAttrs, + const RoutingOpts& rOpts, + const osm::Restrictor& rest) const { + if (route.size() < 2) return EdgeListHops(); + EdgeListHops ret(route.size() - 1); + + for (size_t i = 0; i < route.size() - 1; i++) { + const trgraph::StatGroup* tgGrp = 0; + std::set from, to; + for (auto c : route[i]) from.insert(c.nd); + for (auto c : route[i + 1]) to.insert(c.nd); + if (route[i + 1].begin()->nd->pl().getSI()) + tgGrp = route[i + 1].begin()->nd->pl().getSI()->getGroup(); + + NCostFunc cost(rAttrs, rOpts, rest, tgGrp); + NDistHeur dist(rOpts, to); + + NodeList nodesRet; + EdgeListHop hop; + Dijkstra::shortestPath(from, to, cost, dist, &hop.edges, &nodesRet); + + if (nodesRet.size() > 1) { + // careful: nodesRet is reversed! + hop.start = nodesRet.back(); + hop.end = nodesRet.front(); + } else { + // just take the first candidate if no route could be found + hop.start = *from.begin(); + hop.end = *to.begin(); + } + + ret[i] = hop; + } + + return ret; +} + +// _____________________________________________________________________________ +EdgeListHops Router::routeGreedy2(const NodeCandRoute& route, + const RoutingAttrs& rAttrs, + const RoutingOpts& rOpts, + const osm::Restrictor& rest) const { + if (route.size() < 2) return EdgeListHops(); + EdgeListHops ret(route.size() - 1); + + for (size_t i = 0; i < route.size() - 1; i++) { + const trgraph::StatGroup* tgGrp = 0; + std::set from, to; + + if (i == 0) + for (auto c : route[i]) from.insert(c.nd); + else + from.insert(const_cast(ret[i - 1].end)); + + for (auto c : route[i + 1]) to.insert(c.nd); + + if (route[i + 1].begin()->nd->pl().getSI()) + tgGrp = route[i + 1].begin()->nd->pl().getSI()->getGroup(); + + NCostFunc cost(rAttrs, rOpts, rest, tgGrp); + NDistHeur dist(rOpts, to); + + NodeList nodesRet; + EdgeListHop hop; + Dijkstra::shortestPath(from, to, cost, dist, &hop.edges, &nodesRet); + if (nodesRet.size() > 1) { + // careful: nodesRet is reversed! + hop.start = nodesRet.back(); + hop.end = nodesRet.front(); + } else { + // just take the first candidate if no route could be found + hop.start = *from.begin(); + hop.end = *to.begin(); + } + + ret[i] = hop; + } + + return ret; +} + +// _____________________________________________________________________________ +EdgeListHops Router::route(const EdgeCandRoute& route, + const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& rest) const { + router::Graph cg; + return Router::route(route, rAttrs, rOpts, rest, &cg); +} + +// _____________________________________________________________________________ +EdgeListHops Router::route(const EdgeCandRoute& route, + const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& rest, + router::Graph* cgraph) const { + if (route.size() < 2) return EdgeListHops(); + EdgeListHops ret(route.size() - 1); + + CombCostFunc ccost(rOpts); + router::Node* source = cgraph->addNd(); + router::Node* sink = cgraph->addNd(); + CombNodeMap nodes; + CombNodeMap nextNodes; + + for (size_t i = 0; i < route[0].size(); i++) { + auto e = route[0][i].e; + // we can be sure that each edge is exactly assigned to only one + // node because the transitgraph is directed + nodes[e] = cgraph->addNd(route[0][i].e->getFrom()); + cgraph->addEdg(source, nodes[e]) + ->pl() + .setCost(EdgeCost(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + route[0][i].pen, 0)); + } + + size_t iters = EDijkstra::ITERS; + double itPerSecTot = 0; + size_t n = 0; + for (size_t i = 0; i < route.size() - 1; i++) { + nextNodes.clear(); + HopBand hopBand = getHopBand(route[i], route[i + 1], rAttrs, rOpts, rest); + + const trgraph::StatGroup* tgGrp = 0; + if (route[i + 1].begin()->e->getFrom()->pl().getSI()) + tgGrp = route[i + 1].begin()->e->getFrom()->pl().getSI()->getGroup(); + + std::set froms; + for (const auto& fr : route[i]) froms.insert(fr.e); + + for (auto eFr : froms) { + router::Node* cNodeFr = nodes.find(eFr)->second; + + EdgeSet tos; + std::map edges; + std::map pens; + std::unordered_map edgeLists; + std::unordered_map costs; + + assert(route[i + 1].size()); + + for (const auto& to : route[i + 1]) { + auto eTo = to.e; + tos.insert(eTo); + if (!nextNodes.count(eTo)) + nextNodes[eTo] = cgraph->addNd(to.e->getFrom()); + if (i == route.size() - 2) cgraph->addEdg(nextNodes[eTo], sink); + + edges[eTo] = cgraph->addEdg(cNodeFr, nextNodes[eTo]); + pens[eTo] = to.pen; + + edgeLists[eTo] = edges[eTo]->pl().getEdges(); + edges[eTo]->pl().setStartNode(eFr->getFrom()); + // for debugging + edges[eTo]->pl().setStartEdge(eFr); + edges[eTo]->pl().setEndNode(to.e->getFrom()); + // for debugging + edges[eTo]->pl().setEndEdge(eTo); + } + + size_t iters = EDijkstra::ITERS; + auto t1 = TIME(); + + assert(tos.size()); + assert(froms.size()); + + hops(eFr, froms, tos, tgGrp, edgeLists, &costs, rAttrs, rOpts, rest, + hopBand); + double itPerSec = + (static_cast(EDijkstra::ITERS - iters)) / TOOK(t1, TIME()); + n++; + itPerSecTot += itPerSec; + + LOG(VDEBUG) << "from " << eFr << ": 1-" << tos.size() << " (" + << route[i + 1].size() << " nodes) hop took " + << EDijkstra::ITERS - iters << " iterations, " + << TOOK(t1, TIME()) << "ms (tput: " << itPerSec << " its/ms)"; + for (auto& kv : edges) { + kv.second->pl().setCost( + EdgeCost(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, pens[kv.first], 0) + + costs[kv.first]); + + if (rOpts.popReachEdge && kv.second->pl().getEdges()->size()) { + if (kv.second->pl().getEdges() && + kv.second->pl().getEdges()->size()) { + // the reach edge is included, but we dont want it in the geometry + kv.second->pl().getEdges()->erase( + kv.second->pl().getEdges()->begin()); + } + } + } + } + + std::swap(nodes, nextNodes); + } + + LOG(VDEBUG) << "Hops took " << EDijkstra::ITERS - iters << " iterations," + << " average tput was " << (itPerSecTot / n) << " its/ms"; + + iters = EDijkstra::ITERS; + std::vector res; + EDijkstra::shortestPath(source, sink, ccost, &res); + size_t j = 0; + + LOG(VDEBUG) << "Optim graph solve took " << EDijkstra::ITERS - iters + << " iterations."; + + for (auto i = res.rbegin(); i != res.rend(); i++) { + const auto e = *i; + if (e->getFrom() != source && e->getTo() != sink) { + assert(e->pl().frontNode()); + assert(e->pl().backNode()); + + ret[j] = EdgeListHop{std::move(*e->pl().getEdges()), e->pl().frontNode(), + e->pl().backNode()}; + j++; + } + } + + assert(ret.size() == j); + return ret; +} + +// _____________________________________________________________________________ +EdgeListHops Router::route(const NodeCandRoute& route, + const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& rest) const { + router::Graph cg; + return Router::route(route, rAttrs, rOpts, rest, &cg); +} + +// _____________________________________________________________________________ +EdgeListHops Router::route(const NodeCandRoute& route, + const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& rest, + router::Graph* cgraph) const { + EdgeCandRoute r; + for (auto& nCands : route) { + r.emplace_back(); + for (auto n : nCands) + for (auto* e : n.nd->getAdjListOut()) + r.back().push_back(EdgeCand{e, n.pen}); + } + + return Router::route(r, rAttrs, rOpts, rest, cgraph); +} + +// _____________________________________________________________________________ +void Router::hops(trgraph::Edge* from, const std::set& froms, + const std::set tos, + const trgraph::StatGroup* tgGrp, + const std::unordered_map& edgesRet, + std::unordered_map* rCosts, + const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& rest, HopBand hopB) const { + std::set rem; + + CostFunc cost(rAttrs, rOpts, rest, tgGrp, hopB.maxD); + + const auto& cached = getCachedHops(from, tos, edgesRet, rCosts, rAttrs); + + for (auto e : cached) { + // shortcut: if the nodes lie in two different connected components, + // the distance between them is trivially infinite + if ((rOpts.noSelfHops && (e == from || e->getFrom() == from->getFrom())) || + from->getFrom()->pl().getComp() != e->getTo()->pl().getComp() || + e->pl().oneWay() == 2 || from->pl().oneWay() == 2) { + (*rCosts)[e] = cost.inf(); + } else { + rem.insert(e); + } + } + + LOG(VDEBUG) << "From cache: " << tos.size() - rem.size() + << ", have to cal: " << rem.size(); + + if (rem.size()) { + DistHeur dist(from->getFrom()->pl().getComp()->minEdgeLvl, rOpts, rem); + const auto& ret = EDijkstra::shortestPath(from, rem, cost, dist, edgesRet); + for (const auto& kv : ret) { + nestedCache(edgesRet.at(kv.first), froms, cost, rAttrs); + + (*rCosts)[kv.first] = kv.second; + } + } +} + +// _____________________________________________________________________________ +void Router::nestedCache(const EdgeList* el, + const std::set& froms, + const CostFunc& cost, + const RoutingAttrs& rAttrs) const { + if (!_caching) return; + if (el->size() == 0) return; + // iterate over result edges backwards + EdgeList curEdges; + EdgeCost curCost; + + size_t j = 0; + + for (auto i = el->begin(); i < el->end(); i++) { + if (curEdges.size()) { + curCost = curCost + cost(*i, (*i)->getTo(), curEdges.back()); + } + + curEdges.push_back(*i); + + if (froms.count(*i)) { + EdgeCost startC = cost(0, 0, *i) + curCost; + cache(*i, el->front(), startC, &curEdges, rAttrs); + j++; + } + } +} + +// _____________________________________________________________________________ +std::set Router::getCachedHops( + trgraph::Edge* from, const std::set& tos, + const std::unordered_map& edgesRet, + std::unordered_map* rCosts, + const RoutingAttrs& rAttrs) const { + std::set ret; + for (auto to : tos) { + if (_caching && (*_cache[omp_get_thread_num()])[rAttrs][from].count(to)) { + const auto& cv = (*_cache[omp_get_thread_num()])[rAttrs][from][to]; + (*rCosts)[to] = cv.first; + *edgesRet.at(to) = cv.second; + } else { + ret.insert(to); + } + } + + return ret; +} + +// _____________________________________________________________________________ +void Router::cache(trgraph::Edge* from, trgraph::Edge* to, const EdgeCost& c, + EdgeList* edges, const RoutingAttrs& rAttrs) const { + if (!_caching) return; + if (from == to) return; + (*_cache[omp_get_thread_num()])[rAttrs][from][to] = + std::pair(c, *edges); +} + +// _____________________________________________________________________________ +size_t Router::getCacheNumber() const { return _cache.size(); } diff --git a/src/pfaedle/router/Router.h b/src/pfaedle/router/Router.h index 630d456..74bf581 100644 --- a/src/pfaedle/router/Router.h +++ b/src/pfaedle/router/Router.h @@ -7,95 +7,197 @@ #include #include +#include #include -#include #include #include #include #include #include "pfaedle/Def.h" #include "pfaedle/osm/Restrictor.h" -#include "pfaedle/router/HopCache.h" +#include "pfaedle/router/Graph.h" #include "pfaedle/router/Misc.h" #include "pfaedle/router/RoutingAttrs.h" -#include "pfaedle/router/TripTrie.h" -#include "pfaedle/router/Weights.h" #include "pfaedle/trgraph/Graph.h" -#include "util/Misc.h" #include "util/geo/Geo.h" +#include "util/graph/Dijkstra.h" #include "util/graph/EDijkstra.h" +using util::graph::EDijkstra; +using util::graph::Dijkstra; + namespace pfaedle { namespace router { -constexpr static uint32_t ROUTE_INF = std::numeric_limits::max(); -constexpr static double DBL_INF = std::numeric_limits::infinity(); -constexpr static size_t NO_PREDE = std::numeric_limits::max(); - -constexpr static int MAX_ROUTE_COST_DOUBLING_STEPS = 3; - +typedef std::unordered_map CombNodeMap; typedef std::pair HId; -typedef std::vector LayerCostsDAG; -typedef std::vector CostsDAG; -typedef std::vector> PredeDAG; +typedef std::map< + RoutingAttrs, + std::unordered_map > > > + Cache; -typedef std::unordered_map> - EdgeCostMatrix; -typedef std::unordered_map> - EdgeDistMatrix; -typedef util::graph::EDijkstra::EList TrEList; +struct HopBand { + double minD; + double maxD; + const trgraph::Edge* nearest; + double maxInGrpDist; +}; -typedef std::vector, uint32_t>> CostMatrix; +struct CostFunc + : public EDijkstra::CostFunc { + CostFunc(const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& res, const trgraph::StatGroup* tgGrp, + double max) + : _rAttrs(rAttrs), + _rOpts(rOpts), + _res(res), + _tgGrp(tgGrp), + _inf(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, max, 0) {} -class Router { - public: - virtual ~Router() = default; - virtual std::map route( - const TripTrie* trie, const EdgeCandMap& ecm, - const RoutingOpts& rOpts, const osm::Restrictor& rest, HopCache* hopCache, - bool noFastHops) const = 0; + const RoutingAttrs& _rAttrs; + const RoutingOpts& _rOpts; + const osm::Restrictor& _res; + const trgraph::StatGroup* _tgGrp; + EdgeCost _inf; + + EdgeCost operator()(const trgraph::Edge* from, const trgraph::Node* n, + const trgraph::Edge* to) const; + EdgeCost inf() const { return _inf; } + + double transitLineCmp(const trgraph::EdgePL& e) const; +}; + +struct NCostFunc + : public Dijkstra::CostFunc { + NCostFunc(const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& res, const trgraph::StatGroup* tgGrp) + : _rAttrs(rAttrs), + _rOpts(rOpts), + _res(res), + _tgGrp(tgGrp), + _inf(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + std::numeric_limits::infinity(), 0) {} + + const RoutingAttrs& _rAttrs; + const RoutingOpts& _rOpts; + const osm::Restrictor& _res; + const trgraph::StatGroup* _tgGrp; + EdgeCost _inf; + + EdgeCost operator()(const trgraph::Node* from, const trgraph::Edge* e, + const trgraph::Node* to) const; + EdgeCost inf() const { return _inf; } + + double transitLineCmp(const trgraph::EdgePL& e) const; +}; + +struct DistHeur + : public EDijkstra::HeurFunc { + DistHeur(uint8_t minLvl, const RoutingOpts& rOpts, + const std::set& tos); + + const RoutingOpts& _rOpts; + uint8_t _lvl; + POINT _center; + double _maxCentD; + EdgeCost operator()(const trgraph::Edge* a, + const std::set& b) const; +}; + +struct NDistHeur + : public Dijkstra::HeurFunc { + NDistHeur(const RoutingOpts& rOpts, const std::set& tos); + + const RoutingOpts& _rOpts; + POINT _center; + double _maxCentD; + EdgeCost operator()(const trgraph::Node* a, + const std::set& b) const; +}; + +struct CombCostFunc + : public EDijkstra::CostFunc { + explicit CombCostFunc(const RoutingOpts& rOpts) : _rOpts(rOpts) {} + + const RoutingOpts& _rOpts; + + double operator()(const router::Edge* from, const router::Node* n, + const router::Edge* to) const; + double inf() const { return std::numeric_limits::infinity(); } }; /* * Finds the most likely route of schedule-based vehicle between stops in a * physical transportation network */ -template -class RouterImpl : public Router { +class Router { public: - // Find the most likely path through the graph for a trip trie. - virtual std::map route( - const TripTrie* trie, const EdgeCandMap& ecm, - const RoutingOpts& rOpts, const osm::Restrictor& rest, HopCache* hopCache, - bool noFastHops) const; + // Init this router with caches for numThreads threads + explicit Router(size_t numThreads, bool caching); + ~Router(); + + // Find the most likely path through the graph for a node candidate route. + EdgeListHops route(const NodeCandRoute& route, const RoutingAttrs& rAttrs, + const RoutingOpts& rOpts, + const osm::Restrictor& rest) const; + EdgeListHops route(const NodeCandRoute& route, const RoutingAttrs& rAttrs, + const RoutingOpts& rOpts, const osm::Restrictor& rest, + router::Graph* cgraph) const; + + // Find the most likely path through the graph for an edge candidate route. + EdgeListHops route(const EdgeCandRoute& route, const RoutingAttrs& rAttrs, + const RoutingOpts& rOpts, + const osm::Restrictor& rest) const; + EdgeListHops route(const EdgeCandRoute& route, const RoutingAttrs& rAttrs, + const RoutingOpts& rOpts, const osm::Restrictor& rest, + router::Graph* cgraph) const; + + // Find the most likely path through cgraph for a node candidate route, but + // based on a greedy node to node approach + EdgeListHops routeGreedy(const NodeCandRoute& route, + const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& rest) const; + + // Find the most likely path through cgraph for a node candidate route, but + // based on a greedy node to node set approach + EdgeListHops routeGreedy2(const NodeCandRoute& route, + const RoutingAttrs& rAttrs, + const RoutingOpts& rOpts, + const osm::Restrictor& rest) const; + + // Return the number of thread caches this router was initialized with + size_t getCacheNumber() const; private: - void hops(const EdgeCandGroup& from, const EdgeCandGroup& to, - CostMatrix* rCosts, CostMatrix* dists, const RoutingAttrs& rAttrs, - const RoutingOpts& rOpts, const osm::Restrictor& rest, - HopCache* hopCache, uint32_t maxCost) const; + mutable std::vector _cache; + bool _caching; + HopBand getHopBand(const EdgeCandGroup& a, const EdgeCandGroup& b, + const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& rest) const; - void hopsFast(const EdgeCandGroup& from, const EdgeCandGroup& to, - const LayerCostsDAG& initCosts, CostMatrix* rCosts, - const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, - const osm::Restrictor& rest, + void hops(trgraph::Edge* from, const std::set& froms, + const std::set to, const trgraph::StatGroup* tgGrp, + const std::unordered_map& edgesRet, + std::unordered_map* rCosts, + const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& rest, HopBand hopB) const; - HopCache* hopCache, uint32_t maxCost) const; + std::set getCachedHops( + trgraph::Edge* from, const std::set& to, + const std::unordered_map& edgesRet, + std::unordered_map* rCosts, + const RoutingAttrs& rAttrs) const; - bool connected(const EdgeCand& from, const EdgeCandGroup& tos) const; - bool connected(const EdgeCandGroup& froms, const EdgeCand& to) const; + void cache(trgraph::Edge* from, trgraph::Edge* to, const EdgeCost& c, + EdgeList* edges, const RoutingAttrs& rAttrs) const; - bool cacheDrop( + void nestedCache(const EdgeList* el, const std::set& froms, + const CostFunc& cost, const RoutingAttrs& rAttrs) const; - HopCache* hopCache, const std::set& froms, - const trgraph::Edge* to, uint32_t maxCost) const; - - uint32_t addNonOverflow(uint32_t a, uint32_t b) const; + bool compConned(const EdgeCandGroup& a, const EdgeCandGroup& b) const; }; - -#include "pfaedle/router/Router.tpp" } // namespace router } // namespace pfaedle diff --git a/src/pfaedle/router/Router.tpp b/src/pfaedle/router/Router.tpp deleted file mode 100644 index f9b2a7b..0000000 --- a/src/pfaedle/router/Router.tpp +++ /dev/null @@ -1,629 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifdef _OPENMP -#include -#else -#define omp_get_thread_num() 0 -#define omp_get_num_procs() 1 -#endif - -#include -#include -#include -#include -#include -#include -#include - -using util::graph::EDijkstra; - -// _____________________________________________________________________________ -template -std::map RouterImpl::route( - const TripTrie* trie, const EdgeCandMap& ecm, - const RoutingOpts& rOpts, const osm::Restrictor& rest, HopCache* hopCache, - bool noFastHops) const { - std::map ret; - - // the current node costs in our DAG - CostsDAG costsDAG(trie->getNds().size()); - PredeDAG predeDAG(trie->getNds().size()); - std::vector maxCosts(trie->getNds().size()); - - // skip the root node, init all to inf - for (size_t nid = 1; nid < trie->getNds().size(); nid++) { - costsDAG[nid].resize(ecm.at(nid).size(), DBL_INF); - predeDAG[nid].resize(ecm.at(nid).size(), NO_PREDE); - } - - std::stack st; - - // init cost of all first childs - for (size_t cnid : trie->getNd(0).childs) { - st.push(cnid); - for (size_t frId = 0; frId < ecm.at(cnid).size(); frId++) { - costsDAG[cnid][frId] = ecm.at(cnid)[frId].pen; - } - } - - while (!st.empty()) { - size_t frTrNid = st.top(); - st.pop(); - const auto& frTrNd = trie->getNd(frTrNid); - - // determine the max speed for this hop - double maxSpeed = 0; - for (size_t nid = 0; nid < ecm.at(frTrNid).size(); nid++) { - if (!ecm.at(frTrNid)[nid].e) continue; - if (ecm.at(frTrNid)[nid].e->getFrom()->pl().getComp().maxSpeed > maxSpeed) - maxSpeed = ecm.at(frTrNid)[nid].e->getFrom()->pl().getComp().maxSpeed; - } - - for (size_t toTrNid : trie->getNd(frTrNid).childs) { - CostMatrix costM, dists; - const auto& toTrNd = trie->getNd(toTrNid); - - if (frTrNd.arr && !toTrNd.arr) { - for (size_t toId = 0; toId < costsDAG[toTrNid].size(); toId++) { - auto toCand = ecm.at(toTrNid)[toId]; - for (size_t frId : toCand.depPrede) { - double newC = costsDAG[frTrNid][frId] + ecm.at(toTrNid)[toId].pen; - if (newC < costsDAG[toTrNid][toId]) { - costsDAG[toTrNid][toId] = newC; - predeDAG[toTrNid][toId] = frId; - } - } - } - st.push(toTrNid); - continue; - } - - const double avgDepT = frTrNd.accTime / frTrNd.trips; - const double avgArrT = toTrNd.accTime / toTrNd.trips; - - double hopDist = 0; - - hopDist = util::geo::haversine(frTrNd.lat, frTrNd.lng, toTrNd.lat, - toTrNd.lng); - - double minTime = hopDist / maxSpeed; - double hopTime = avgArrT - avgDepT; - - if (hopTime < minTime) hopTime = minTime; - - uint32_t newMaxCost = TW::maxCost(hopTime, rOpts); - uint32_t maxCost = newMaxCost; - - bool found = false; - int step = 0; - - while (!found && step <= MAX_ROUTE_COST_DOUBLING_STEPS) { - maxCosts[toTrNid] = newMaxCost; - maxCost = newMaxCost; - - // calculate n x n hops between layers - if (noFastHops || !TW::ALLOWS_FAST_ROUTE) { - hops(ecm.at(frTrNid), ecm.at(toTrNid), &costM, &dists, toTrNd.rAttrs, - rOpts, rest, hopCache, maxCost); - } else { - hopsFast(ecm.at(frTrNid), ecm.at(toTrNid), costsDAG[frTrNid], &costM, - toTrNd.rAttrs, rOpts, rest, hopCache, maxCost); - } - - for (size_t matrixI = 0; matrixI < costM.size(); matrixI++) { - const auto& mVal = costM[matrixI]; - const size_t frId = mVal.first.first; - const size_t toId = mVal.first.second; - const uint32_t c = mVal.second; - - double mDist = 0; - - // the dists and the costM matrices have entries at exactly the same - // loc - if (TW::NEED_DIST) mDist = dists[matrixI].second; - - // calculate the transition weights - const double depT = ecm.at(frTrNid)[frId].time; - const double arrT = ecm.at(toTrNid)[toId].time; - const double w = TW::weight(c, mDist, arrT - depT, hopDist, rOpts); - - // update costs to successors in next layer - double newC = costsDAG[frTrNid][frId] + ecm.at(toTrNid)[toId].pen + w; - if (newC < costsDAG[toTrNid][toId]) { - costsDAG[toTrNid][toId] = newC; - predeDAG[toTrNid][toId] = frId; - found = true; - } - } - - if (newMaxCost <= std::numeric_limits::max() / 2) - newMaxCost *= 2; - else - newMaxCost = std::numeric_limits::max(); - - if (newMaxCost == maxCost) break; - step++; - } - - if (!found) { - // write the cost for the NULL candidates as a fallback - for (size_t frNid = 0; frNid < ecm.at(frTrNid).size(); frNid++) { - double newC = costsDAG[frTrNid][frNid] + maxCost * 100; - // in the time expanded case, there might be multiple null cands - size_t nullCId = 0; - while (nullCId < ecm.at(toTrNid).size() && - !ecm.at(toTrNid)[nullCId].e) { - if (newC < costsDAG[toTrNid][nullCId]) { - predeDAG[toTrNid][nullCId] = frNid; - costsDAG[toTrNid][nullCId] = newC; - } - nullCId++; - } - } - - // for the remaining, write dummy edges - for (size_t frNid = 0; frNid < ecm.at(frTrNid).size(); frNid++) { - // skip NULL candidates - size_t toNid = 1; - while (toNid < ecm.at(toTrNid).size() && !ecm.at(toTrNid)[toNid].e) - toNid++; - for (; toNid < ecm.at(toTrNid).size(); toNid++) { - double newC = costsDAG[frTrNid][frNid] + ecm.at(toTrNid)[toNid].pen; - if (newC < costsDAG[toTrNid][toNid]) { - predeDAG[toTrNid][toNid] = frNid; - costsDAG[toTrNid][toNid] = newC; - } - } - } - } - - st.push(toTrNid); - } - } - - // update sink costs - std::unordered_map sinkCosts; - std::unordered_map frontIds; - for (auto leaf : trie->getNdTrips()) { - sinkCosts[leaf.first] = DBL_INF; - frontIds[leaf.first] = 0; - - for (size_t lastId = 0; lastId < ecm.at(leaf.first).size(); lastId++) { - double nCost = costsDAG[leaf.first][lastId]; - if (nCost < sinkCosts[leaf.first]) { - frontIds[leaf.first] = lastId; - sinkCosts[leaf.first] = nCost; - } - } - } - - // retrieve edges - for (auto leaf : trie->getNdTrips()) { - const auto leafNid = leaf.first; - auto curTrieNid = leafNid; - - while (predeDAG[curTrieNid][frontIds[leafNid]] != NO_PREDE) { - const auto curTrieParNid = trie->getNd(curTrieNid).parent; - const auto frId = predeDAG[curTrieNid][frontIds[leafNid]]; - const auto toId = frontIds[leafNid]; - - const auto frTrNd = trie->getNd(curTrieParNid); - const auto toTrNd = trie->getNd(curTrieNid); - - // skip in-node hops - if (frTrNd.arr && !toTrNd.arr) { - frontIds[leafNid] = frId; - curTrieNid = curTrieParNid; - continue; - } - - std::vector edgs; - - const auto& fr = ecm.at(curTrieParNid)[frId]; - const auto& to = ecm.at(curTrieNid)[toId]; - - // for subtracting and adding progression costs - typename TW::CostFunc costPr(toTrNd.rAttrs, rOpts, rest, ROUTE_INF); - - if (fr.e && to.e) { - // account for max progression start offset, do this exactly like - // in the hops calculation to ensure that we can find the path again - double maxProgrStart = 0; - for (const auto& fr : ecm.at(curTrieParNid)) { - if (!fr.e) continue; - double progrStart = 0; - if (fr.progr > 0) progrStart = costPr(fr.e, 0, 0) * fr.progr; - if (progrStart > maxProgrStart) maxProgrStart = progrStart; - } - - const double maxCostRt = maxCosts[curTrieNid] + maxProgrStart; - uint32_t maxCostRtInt = maxCostRt; - - // avoid overflow - if (maxCostRt >= std::numeric_limits::max()) { - maxCostRtInt = std::numeric_limits::max(); - } - - typename TW::CostFunc cost(toTrNd.rAttrs, rOpts, rest, maxCostRtInt); - typename TW::DistHeur distH(fr.e->getFrom()->pl().getComp().maxSpeed, - rOpts, {to.e}); - - const double c = - EDijkstra::shortestPath(fr.e, to.e, cost, distH, &edgs); - - if (c < maxCostRtInt) { - // a path was found, use it - ret[leafNid].push_back( - {edgs, fr.e, to.e, fr.progr, to.progr, {}, {}}); - } else { - // no path was found, which is marked by an empty edge list - ret[leafNid].push_back({{}, fr.e, to.e, fr.progr, to.progr, {}, {}}); - } - } else { - // fallback to the position given in candidate - if (fr.e) { - ret[leafNid].push_back({edgs, fr.e, 0, fr.progr, 0, {}, to.point}); - } else if (to.e) { - ret[leafNid].push_back({edgs, 0, to.e, 0, to.progr, fr.point, {}}); - } else { - ret[leafNid].push_back({edgs, 0, 0, 0, 0, fr.point, to.point}); - } - } - frontIds[leafNid] = frId; - curTrieNid = curTrieParNid; - } - } - - return ret; -} - -// _____________________________________________________________________________ -template -void RouterImpl::hops(const EdgeCandGroup& froms, const EdgeCandGroup& tos, - CostMatrix* rCosts, CostMatrix* dists, - const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, - const osm::Restrictor& rest, HopCache* hopCache, - uint32_t maxCost) const { - // standard 1 -> n approach - std::set eFrs; - for (const auto& from : froms) { - if (!from.e) continue; - eFrs.insert(from.e); - } - - std::set eTos; - for (const auto& to : tos) { - if (!to.e) continue; - eTos.insert(to.e); - } - - EdgeCostMatrix ecm; - EdgeDistMatrix ecmDist; - - // account for max progression start offset - double maxProgrStart = 0; - typename TW::CostFunc cost(rAttrs, rOpts, rest, ROUTE_INF); - for (const auto& fr : froms) { - if (!fr.e) continue; - double progrStart = 0; - if (fr.progr > 0) progrStart = cost(fr.e, 0, 0) * fr.progr; - if (progrStart > maxProgrStart) maxProgrStart = progrStart; - } - - maxCost = addNonOverflow(maxCost, maxProgrStart); - typename TW::CostFunc costF(rAttrs, rOpts, rest, maxCost); - - for (trgraph::Edge* eFrom : eFrs) { - std::set remTos; - for (trgraph::Edge* eTo : eTos) { - // init ecmDist - ecmDist[eFrom][eTo] = ROUTE_INF; - - std::pair cached = {0, 0}; - if (hopCache) cached = hopCache->get(eFrom, eTo); - - // shortcut: if the nodes lie in two different connected components, - // the distance between them is trivially infinite - if (eFrom->getFrom()->pl().getCompId() != - eTo->getTo()->pl().getCompId()) { - ecm[eFrom][eTo] = costF.inf(); - } else if (cached.second >= costF.inf()) { - ecm[eFrom][eTo] = costF.inf(); - } else if (!TW::NEED_DIST && cached.second) { - ecm[eFrom][eTo] = cached.first; - } else { - remTos.insert(eTo); - } - } - - if (remTos.size()) { - typename TW::DistHeur distH(eFrom->getFrom()->pl().getComp().maxSpeed, - rOpts, remTos); - - std::unordered_map paths; - std::unordered_map pathPtrs; - for (auto to : tos) pathPtrs[to.e] = &paths[to.e]; - - const auto& costs = - EDijkstra::shortestPath(eFrom, remTos, costF, distH, pathPtrs); - - for (const auto& c : costs) { - ecm[eFrom][c.first] = c.second; - - if (paths[c.first].size() == 0) { - if (hopCache) hopCache->setMin(eFrom, c.first, maxCost); - continue; // no path found - } - - if (hopCache) hopCache->setEx(eFrom, c.first, c.second); - } - - if (TW::NEED_DIST) { - for (const auto& c : costs) { - if (!paths[c.first].size()) continue; - double d = 0; - // don't count last edge - for (size_t i = paths[c.first].size() - 1; i > 0; i--) { - d += paths[c.first][i]->pl().getLength(); - } - ecmDist[eFrom][c.first] = d; - } - } - } - } - - // build return costs - for (size_t frId = 0; frId < froms.size(); frId++) { - auto fr = froms[frId]; - if (!fr.e) continue; - auto costFr = costF(fr.e, 0, 0); - for (size_t toId = 0; toId < tos.size(); toId++) { - auto to = tos[toId]; - if (!to.e) continue; - - uint32_t c = ecm[fr.e][to.e]; - - if (c >= maxCost) continue; - - double dist = 0; - if (TW::NEED_DIST) dist = ecmDist[fr.e][to.e]; - - if (fr.e == to.e) { - if (fr.progr <= to.progr) { - auto costTo = costF(to.e, 0, 0); - const uint32_t progrCFr = costFr * fr.progr; - const uint32_t progrCTo = costTo * to.progr; - - // calculate this in one step to avoid uint32_t underflow below - c += progrCTo - progrCFr; - } else { - // trivial case we can ignore - continue; - } - - } else { - // subtract progression cost on first edge - if (fr.progr > 0) { - const uint32_t progrCFr = costFr * fr.progr; - c -= progrCFr; - if (TW::NEED_DIST) dist -= fr.e->pl().getLength() * fr.progr; - } - - // add progression cost on last edge - if (to.progr > 0) { - const auto costTo = costF(to.e, 0, 0); - const uint32_t progrCTo = costTo * to.progr; - c += progrCTo; - if (TW::NEED_DIST) dist += to.e->pl().getLength() * to.progr; - } - } - - if (c < maxCost - maxProgrStart) { - rCosts->push_back({{frId, toId}, c}); - if (TW::NEED_DIST) - dists->push_back({{frId, toId}, static_cast(dist)}); - } - } - } -} - -// _____________________________________________________________________________ -template -void RouterImpl::hopsFast(const EdgeCandGroup& froms, - const EdgeCandGroup& tos, - const LayerCostsDAG& rawInitCosts, - CostMatrix* rCosts, const RoutingAttrs& rAttrs, - const RoutingOpts& rOpts, - const osm::Restrictor& restr, HopCache* hopCache, - uint32_t maxCost) const { - std::unordered_map initCosts; - - std::set eFrs, eTos; - std::map> eFrCands, eToCands; - - double maxSpeed = 0; - for (size_t frId = 0; frId < froms.size(); frId++) { - if (rawInitCosts[frId] >= DBL_INF || !connected(froms[frId], tos)) continue; - - eFrs.insert(froms[frId].e); - eFrCands[froms[frId].e].push_back(frId); - - if (froms[frId].e->getFrom()->pl().getComp().maxSpeed > maxSpeed) - maxSpeed = froms[frId].e->getFrom()->pl().getComp().maxSpeed; - } - - for (size_t toId = 0; toId < tos.size(); toId++) { - if (!connected(froms, tos[toId])) - continue; // skip nodes not conn'ed to any - - if (hopCache && cacheDrop(hopCache, eFrs, tos[toId].e, maxCost)) - continue; // skip nodes we have already encountered at higher cost - - eTos.insert(tos[toId].e); - eToCands[tos[toId].e].push_back(toId); - } - - if (eFrs.size() == 0 || eTos.size() == 0) return; - - // account for max progression start offset - double maxProgrStart = 0; - typename TW::CostFunc progrCostF(rAttrs, rOpts, restr, ROUTE_INF); - for (const auto& fr : froms) { - if (!fr.e) continue; - double progrStart = 0; - if (fr.progr > 0) progrStart = progrCostF(fr.e, 0, 0) * fr.progr; - if (progrStart > maxProgrStart) maxProgrStart = progrStart; - } - - // initialize init doubles - LayerCostsDAG prepInitCosts(froms.size()); - for (size_t frId = 0; frId < froms.size(); frId++) { - if (!froms[frId].e || rawInitCosts[frId] >= DBL_INF) continue; - const auto& fr = froms[frId]; - // offset by progr start - double progrStart = progrCostF(fr.e, 0, 0) * fr.progr; - prepInitCosts[frId] = - TW::invWeight(rawInitCosts[frId], rOpts) + maxProgrStart - progrStart; - } - - // all init costs are inf - for (const auto& fr : froms) initCosts[fr.e] = ROUTE_INF; - - // now chose the best offset cost - for (size_t frId = 0; frId < froms.size(); frId++) { - if (!froms[frId].e || rawInitCosts[frId] >= DBL_INF) continue; - const auto& fr = froms[frId]; - if (prepInitCosts[frId] < initCosts[fr.e]) - initCosts[fr.e] = prepInitCosts[frId]; - } - - // get max init costs - uint32_t maxInit = 0; - uint32_t minInit = ROUTE_INF; - for (const auto& c : initCosts) { - if (!eFrs.count(c.first)) continue; - if (c.second != ROUTE_INF && c.second > maxInit) maxInit = c.second; - if (c.second < minInit) minInit = c.second; - } - - for (auto& c : initCosts) c.second = c.second - minInit; - - // account for start offsets - maxCost = addNonOverflow(maxCost, maxProgrStart); - - typename TW::CostFunc costF(rAttrs, rOpts, restr, - maxCost + (maxInit - minInit)); - - std::unordered_map paths; - std::unordered_map pathPtrs; - for (const auto& to : tos) pathPtrs[to.e] = &paths[to.e]; - - typename TW::DistHeur distH(maxSpeed, rOpts, eTos); - - const auto& costs = - EDijkstra::shortestPath(eFrs, eTos, initCosts, maxCost, costF, distH); - - for (const auto& c : costs) { - auto toEdg = c.first; - if (c.second.second >= costF.inf()) { - if (hopCache) hopCache->setMin(eFrs, toEdg, maxCost); - continue; // no path found - } - auto fromEdg = c.second.first; - uint32_t cost = c.second.second - initCosts[fromEdg]; - - if (cost >= maxCost) continue; - - for (size_t frId : eFrCands.find(fromEdg)->second) { - const auto& fr = froms[frId]; - auto costFr = costF(fr.e, 0, 0); - - for (size_t toId : eToCands.find(toEdg)->second) { - const auto& to = tos[toId]; - uint32_t wrCost = cost; - - if (fr.e == to.e) { - if (fr.progr <= to.progr) { - const auto costTo = costF(to.e, 0, 0); - const uint32_t progrCFr = costFr * fr.progr; - const uint32_t progrCTo = costTo * to.progr; - - // calculate this in one step to avoid uint32_t underflow below - wrCost += progrCTo - progrCFr; - } else { - // trivial case we can ignore - continue; - } - } else { - // subtract progression cost on first edge - if (fr.progr > 0) { - const uint32_t progrCFr = costFr * fr.progr; - wrCost -= progrCFr; - } - - // add progression cost on last edge - if (to.progr > 0) { - const auto costTo = costF(to.e, 0, 0); - const uint32_t progrCTo = costTo * to.progr; - wrCost += progrCTo; - } - } - - if (wrCost < maxCost - maxProgrStart) { - rCosts->push_back({{frId, toId}, wrCost}); - } - } - } - } -} - -// _____________________________________________________________________________ -template -bool RouterImpl::connected(const EdgeCand& fr, - const EdgeCandGroup& tos) const { - if (!fr.e) return false; - for (const auto& to : tos) { - if (!to.e) continue; - if (fr.e->getFrom()->pl().getCompId() == to.e->getFrom()->pl().getCompId()) - return true; - } - return false; -} - -// _____________________________________________________________________________ -template -bool RouterImpl::connected(const EdgeCandGroup& froms, - const EdgeCand& to) const { - if (!to.e) return false; - for (const auto& fr : froms) { - if (!fr.e) continue; - if (fr.e->getFrom()->pl().getCompId() == to.e->getFrom()->pl().getCompId()) - return true; - } - return false; -} - -// _____________________________________________________________________________ -template -bool RouterImpl::cacheDrop(HopCache* hopCache, - const std::set& froms, - const trgraph::Edge* to, - uint32_t maxCost) const { - for (auto fr : froms) - if (hopCache->get(fr, to).first <= maxCost) return false; - - return true; -} - -// _____________________________________________________________________________ -template -uint32_t RouterImpl::addNonOverflow(uint32_t a, uint32_t b) const { - if (a == std::numeric_limits::max() || - b == std::numeric_limits::max()) - return std::numeric_limits::max(); - uint32_t res = a + b; - if (res >= a && res >= b) return res; - return std::numeric_limits::max(); -} diff --git a/src/pfaedle/router/RoutingAttrs.h b/src/pfaedle/router/RoutingAttrs.h index 3f7965b..11a5cdb 100644 --- a/src/pfaedle/router/RoutingAttrs.h +++ b/src/pfaedle/router/RoutingAttrs.h @@ -5,10 +5,8 @@ #ifndef PFAEDLE_ROUTER_ROUTINGATTRS_H_ #define PFAEDLE_ROUTER_ROUTINGATTRS_H_ +#include #include -#include -#include -#include "pfaedle/statsimi-classifier/StatsimiClassifier.h" #include "pfaedle/trgraph/EdgePL.h" using pfaedle::trgraph::TransitEdgeLine; @@ -16,81 +14,40 @@ using pfaedle::trgraph::TransitEdgeLine; namespace pfaedle { namespace router { -struct LineSimilarity { - bool nameSimilar : 1; - bool fromSimilar : 1; - bool toSimilar : 1; -}; - -inline bool operator<(const LineSimilarity& a, const LineSimilarity& b) { - return (a.nameSimilar + a.fromSimilar + a.toSimilar) < - (b.nameSimilar + b.fromSimilar + b.toSimilar); -} - struct RoutingAttrs { - RoutingAttrs() - : lineFrom(""), lineTo(), shortName(""), classifier(0), _simiCache() {} - RoutingAttrs(const std::string& shortName, const std::string& lineFrom, - const std::string& lineTo) - : lineFrom(lineFrom), - lineTo({lineTo}), - shortName(shortName), - classifier(0), - _simiCache() {} - std::string lineFrom; - std::vector lineTo; + RoutingAttrs() : fromString(""), toString(""), shortName(""), _simiCache() {} + std::string fromString; + std::string toString; std::string shortName; - const pfaedle::statsimiclassifier::StatsimiClassifier* classifier; - - mutable std::unordered_map _simiCache; - - LineSimilarity simi(const TransitEdgeLine* line) const { - // shortcut, if we don't have a line information, classify as similar - if (line->shortName.empty() && line->toStr.empty() && line->fromStr.empty()) - return {true, true, true}; + mutable std::map _simiCache; + // carfull: lower return value = higher similarity + double simi(const TransitEdgeLine* line) const { auto i = _simiCache.find(line); if (i != _simiCache.end()) return i->second; - LineSimilarity ret{false, false, false}; - + double cur = 1; if (shortName.empty() || router::lineSimi(line->shortName, shortName) > 0.5) - ret.nameSimilar = true; + cur -= 0.333333333; - if (lineTo.size() == 0) { - ret.toSimilar = true; - } else { - for (const auto& lTo : lineTo) { - if (lTo.empty() || classifier->similar(line->toStr, lTo)) { - ret.toSimilar = true; - break; - } - } - } + if (toString.empty() || line->toStr.empty() || + router::statSimi(line->toStr, toString) > 0.5) + cur -= 0.333333333; - if (lineFrom.empty() || classifier->similar(line->fromStr, lineFrom)) - ret.fromSimilar = true; + if (fromString.empty() || line->fromStr.empty() || + router::statSimi(line->fromStr, fromString) > 0.5) + cur -= 0.333333333; - _simiCache[line] = ret; + _simiCache[line] = cur; - return ret; - } - - void merge(const RoutingAttrs& other) { - assert(other.lineFrom == lineFrom); - assert(other.shortName == shortName); - - for (const auto& l : other.lineTo) { - auto i = std::lower_bound(lineTo.begin(), lineTo.end(), l); - if (i != lineTo.end() && (*i) == l) continue; // already present - lineTo.insert(i, l); - } + return cur; } }; inline bool operator==(const RoutingAttrs& a, const RoutingAttrs& b) { - return a.shortName == b.shortName && a.lineFrom == b.lineFrom; + return a.shortName == b.shortName && a.toString == b.toString && + a.fromString == b.fromString; } inline bool operator!=(const RoutingAttrs& a, const RoutingAttrs& b) { @@ -98,8 +55,10 @@ inline bool operator!=(const RoutingAttrs& a, const RoutingAttrs& b) { } inline bool operator<(const RoutingAttrs& a, const RoutingAttrs& b) { - return a.lineFrom < b.lineFrom || - (a.lineFrom == b.lineFrom && a.shortName < b.shortName); + return a.fromString < b.fromString || + (a.fromString == b.fromString && a.toString < b.toString) || + (a.fromString == b.fromString && a.toString == b.toString && + a.shortName < b.shortName); } } // namespace router diff --git a/src/pfaedle/router/ShapeBuilder.cpp b/src/pfaedle/router/ShapeBuilder.cpp index bf5857c..cd5c85b 100644 --- a/src/pfaedle/router/ShapeBuilder.cpp +++ b/src/pfaedle/router/ShapeBuilder.cpp @@ -2,24 +2,27 @@ // Chair of Algorithms and Data Structures. // Authors: Patrick Brosi -#include -#include -#include +#ifdef _OPENMP +#include +#else +#define omp_get_thread_num() 0 +#define omp_get_num_procs() 1 +#endif + +#include #include #include -#include #include -#include #include #include - #include "ad/cppgtfs/gtfs/Feed.h" #include "pfaedle/Def.h" +#include "pfaedle/eval/Collector.h" #include "pfaedle/gtfs/Feed.h" #include "pfaedle/gtfs/StopTime.h" #include "pfaedle/osm/OsmBuilder.h" #include "pfaedle/router/ShapeBuilder.h" -#include "pfaedle/statsimi-classifier/StatsimiClassifier.h" +#include "pfaedle/trgraph/StatGroup.h" #include "util/geo/Geo.h" #include "util/geo/output/GeoGraphJsonOutput.h" #include "util/geo/output/GeoJsonOutput.h" @@ -30,523 +33,255 @@ using util::geo::DBox; using util::geo::DPoint; using util::geo::extendBox; using util::geo::minbox; -using util::geo::PolyLine; -using ad::cppgtfs::gtfs::NO_COLOR; using ad::cppgtfs::gtfs::ShapePoint; using ad::cppgtfs::gtfs::Stop; using pfaedle::gtfs::Feed; using pfaedle::gtfs::StopTime; using pfaedle::gtfs::Trip; using pfaedle::osm::BBoxIdx; -using pfaedle::router::EdgeCandGroup; -using pfaedle::router::EdgeCandMap; +using pfaedle::router::Clusters; using pfaedle::router::EdgeListHops; using pfaedle::router::FeedStops; +using pfaedle::router::NodeCandGroup; +using pfaedle::router::NodeCandRoute; using pfaedle::router::RoutingAttrs; using pfaedle::router::ShapeBuilder; -using pfaedle::router::Stats; -using pfaedle::router::TripForests; -using pfaedle::router::TripTrie; -using pfaedle::trgraph::EdgeGrid; -using pfaedle::trgraph::NodeGrid; using util::geo::latLngToWebMerc; -using util::geo::M_PER_DEG; +using util::geo::webMercMeterDist; +using util::geo::webMercToLatLng; using util::geo::output::GeoGraphJsonOutput; // _____________________________________________________________________________ -ShapeBuilder::ShapeBuilder( - Feed* feed, MOTs mots, const config::MotConfig& motCfg, - pfaedle::trgraph::Graph* g, router::FeedStops* fStops, - osm::Restrictor* restr, - const pfaedle::statsimiclassifier::StatsimiClassifier* classifier, - router::Router* router, const config::Config& cfg) +ShapeBuilder::ShapeBuilder(Feed* feed, ad::cppgtfs::gtfs::Feed* evalFeed, + MOTs mots, const config::MotConfig& motCfg, + eval::Collector* ecoll, pfaedle::trgraph::Graph* g, + router::FeedStops* fStops, osm::Restrictor* restr, + const config::Config& cfg) : _feed(feed), + _evalFeed(evalFeed), _mots(mots), _motCfg(motCfg), + _ecoll(ecoll), _cfg(cfg), _g(g), + _crouter(omp_get_num_procs(), cfg.useCaching), _stops(fStops), _curShpCnt(0), - _restr(restr), - _classifier(classifier), - _router(router) { - pfaedle::osm::BBoxIdx box(BOX_PADDING); - ShapeBuilder::getGtfsBox(feed, mots, cfg.shapeTripId, cfg.dropShapes, &box, - _motCfg.osmBuildOpts.maxSpeed, 0, cfg.verbosity); - - _eGrid = EdgeGrid(cfg.gridSize, cfg.gridSize, box.getFullBox(), false); - _nGrid = NodeGrid(cfg.gridSize, cfg.gridSize, box.getFullBox(), false); - - LOG(DEBUG) << "Grid size of " << _nGrid.getXWidth() << "x" - << _nGrid.getYHeight(); - - buildIndex(); + _restr(restr) { + _numThreads = _crouter.getCacheNumber(); } // _____________________________________________________________________________ -void ShapeBuilder::buildIndex() { - for (auto* n : _g->getNds()) { - for (auto* e : n->getAdjListOut()) { - if (e->pl().lvl() > _motCfg.osmBuildOpts.maxSnapLevel) continue; - // don't snap to one way edges - if (e->pl().oneWay() == 2) continue; - - _eGrid.add(*e->pl().getGeom(), e); - } - } - - for (auto* n : _g->getNds()) { - // only station nodes - if (n->pl().getSI()) { - _nGrid.add(*n->pl().getGeom(), n); - } - } +const NodeCandGroup& ShapeBuilder::getNodeCands(const Stop* s) const { + if (_stops->find(s) == _stops->end() || _stops->at(s) == 0) return _emptyNCG; + return _stops->at(s)->pl().getSI()->getGroup()->getNodeCands(s); } // _____________________________________________________________________________ -void ShapeBuilder::buildCandCache(const TripForests& forests) { - std::set stops; - size_t count = 0; - - for (const auto& forest : forests) { - for (const auto& trie : forest.second) { - for (const auto& trips : trie.getNdTrips()) { - for (const auto& st : trips.second[0]->getStopTimes()) { - stops.insert(st.getStop()); - } - } - } - } - - size_t numThreads = std::thread::hardware_concurrency(); - std::vector thrds(numThreads); - std::vector caches(numThreads); - std::vector> threadStops(numThreads); - - size_t i = 0; - for (auto stop : stops) { - threadStops[i].push_back(stop); - if (++i == numThreads) i = 0; - } - - i = 0; - for (auto& t : thrds) { - t = std::thread(&ShapeBuilder::edgCandWorker, this, &threadStops[i], - &caches[i]); - i++; - } - - for (auto& thr : thrds) thr.join(); - - // merge - for (size_t i = 0; i < numThreads; i++) { - for (const auto& c : caches[i]) { - _grpCache[c.first] = c.second; - count += c.second.size(); - } - } - - if (_grpCache.size()) - LOG(DEBUG) << "Average candidate set size: " - << ((count * 1.0) / _grpCache.size()); -} - -// _____________________________________________________________________________ -EdgeCandGroup ShapeBuilder::getEdgCands(const Stop* s) const { - auto cached = _grpCache.find(s); - if (cached != _grpCache.end()) return cached->second; - - EdgeCandGroup ret; - - const auto& snormzer = _motCfg.osmBuildOpts.statNormzer; - auto normedName = snormzer.norm(s->getName()); - - // the first cand is a placeholder for the stop position itself, it is chosen - // when no candidate yielded a feasible route - auto pos = POINT(s->getLng(), s->getLat()); - ret.push_back({0, 0, 0, pos, 0, {}}); - - double maxMDist = _motCfg.osmBuildOpts.maxStationCandDistance; - - double distor = util::geo::latLngDistFactor(pos); - - if (_cfg.gaussianNoise > 0) { - unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); - std::default_random_engine gen(seed); - - // the standard dev is given in meters, convert (roughly...) to degrees - double standardDev = (_cfg.gaussianNoise / M_PER_DEG) / distor; - - // mean 0 (no movement), standard dev according to config - std::normal_distribution dist(0.0, standardDev); - - // add gaussian noise - pos.setX(pos.getX() + dist(gen)); - pos.setY(pos.getY() + dist(gen)); - } - - std::set frNIdx; - _nGrid.get(util::geo::pad(util::geo::getBoundingBox(pos), - (maxMDist / M_PER_DEG) / distor), - &frNIdx); - - if (_motCfg.routingOpts.useStations) { - for (auto nd : frNIdx) { - assert(nd->pl().getSI()); - - double mDist = util::geo::haversine(pos, *nd->pl().getGeom()); - if (mDist > maxMDist) continue; - - double nameMatchPunish = 0; - double trackMatchPunish = 0; - - if (!_classifier->similar(normedName, pos, nd->pl().getSI()->getName(), - *nd->pl().getGeom())) { - // stations do not match, punish - nameMatchPunish = _motCfg.routingOpts.stationUnmatchedPen; - } - std::string platform = s->getPlatformCode(); - - if (!platform.empty() && !nd->pl().getSI()->getTrack().empty() && - nd->pl().getSI()->getTrack() == platform) { - trackMatchPunish = _motCfg.routingOpts.platformUnmatchedPen; - } - - for (auto* e : nd->getAdjListOut()) { - // don't snap to one way edges - if (e->pl().oneWay() == 2) continue; - ret.push_back({e, - emWeight(mDist) + nameMatchPunish + trackMatchPunish, - 0, - {}, - 0, - {}}); - } - } - } - - maxMDist = _motCfg.osmBuildOpts.maxSnapDistance; - - std::set frEIdx; - _eGrid.get(util::geo::pad(util::geo::getBoundingBox(pos), - (maxMDist / M_PER_DEG) / distor), - &frEIdx); - - std::set selected; - std::map scores; - std::map progrs; - - for (auto edg : frEIdx) { - if (selected.count(edg)) continue; - - auto reach = deg2reachable(edg, selected); - - double mDist = dist(pos, *edg->pl().getGeom()) * distor * M_PER_DEG; - - if (mDist > maxMDist) continue; - - if (!reach || mDist < scores[reach]) { - if (reach) { - selected.erase(selected.find(reach)); - scores.erase(scores.find(reach)); - } - util::geo::PolyLine pl(*edg->pl().getGeom()); - auto lp = pl.projectOn(pos); - double progr = lp.totalPos; - if (edg->pl().isRev()) progr = 1 - progr; - selected.insert(edg); - scores[edg] = mDist; - progrs[edg] = progr; - } - } - - for (auto e : selected) { - ret.push_back({e, - emWeight(scores[e]) + _motCfg.routingOpts.nonStationPen, - progrs[e], - {}, - 0, - {}}); - } - - if (ret.size() == 1 && _cfg.verbosity) { - LOG(WARN) << "No snapping candidate found for stop '" << s->getName() - << "' (" << s->getId() << ")"; - } - - return ret; -} - -// _____________________________________________________________________________ -pfaedle::trgraph::Edge* ShapeBuilder::deg2reachable( - trgraph::Edge* e, std::set edgs) const { - trgraph::Edge* cur = e; - - // forward - while (cur->getTo()->getDeg() == 2) { - // dont allow backtracking on reverse edge - auto next = e->getTo()->getAdjListOut().front()->getTo() == e->getFrom() - ? e->getTo()->getAdjListOut().back() - : e->getTo()->getAdjListOut().front(); - if (next == e || next == cur) break; // avoid circles - if (next->pl().oneWay() == 2) break; // dont follow one way edges - if (edgs.count(next)) return next; - cur = next; - } - - // backward - while (cur->getFrom()->getDeg() == 2) { - // dont allow backtracking on reverse edge - auto next = e->getFrom()->getAdjListIn().front()->getFrom() == e->getTo() - ? e->getFrom()->getAdjListIn().back() - : e->getFrom()->getAdjListIn().front(); - if (next == e || next == cur) break; // avoid circles - if (next->pl().oneWay() == 2) break; // dont follow one way edges - if (edgs.count(cur)) return next; - cur = next; - } - - return 0; -} - -// _____________________________________________________________________________ -std::pair, Stats> ShapeBuilder::shapeL(Trip* trip) { - Stats stats; +LINE ShapeBuilder::shapeL(const router::NodeCandRoute& ncr, + const router::RoutingAttrs& rAttrs) { try { - T_START(t); - EDijkstra::ITERS = 0; - auto hops = shapeify(trip); - stats.solveTime = T_STOP(t); - stats.numTries = 1; - stats.numTrieLeafs = 1; - stats.totNumTrips = 1; - stats.dijkstraIters = EDijkstra::ITERS; - std::map colors; - LOG(INFO) << "Matched 1 trip in " << std::fixed << std::setprecision(2) - << stats.solveTime << " ms."; - // print to line - return {getGeom(hops, getRAttrs(trip), &colors, trip, 1), stats}; + const router::EdgeListHops& res = route(ncr, rAttrs); + + LINE l; + for (const auto& hop : res) { + const trgraph::Node* last = hop.start; + if (hop.edges.size() == 0) { + l.push_back(*hop.start->pl().getGeom()); + l.push_back(*hop.end->pl().getGeom()); + } + for (auto i = hop.edges.rbegin(); i != hop.edges.rend(); i++) { + const auto* e = *i; + if ((e->getFrom() == last) ^ e->pl().isRev()) { + l.insert(l.end(), e->pl().getGeom()->begin(), + e->pl().getGeom()->end()); + } else { + l.insert(l.end(), e->pl().getGeom()->rbegin(), + e->pl().getGeom()->rend()); + } + last = e->getOtherNd(last); + } + } + + return l; } catch (const std::runtime_error& e) { LOG(ERROR) << e.what(); - return {std::vector(), stats}; + return LINE(); } } // _____________________________________________________________________________ -std::map ShapeBuilder::route( - const TripTrie* trie, const EdgeCandMap& ecm, - HopCache* hopCache) const { - return _router->route(trie, ecm, _motCfg.routingOpts, *_restr, hopCache, - _cfg.noFastHops); +LINE ShapeBuilder::shapeL(Trip* trip) { + return shapeL(getNCR(trip), getRAttrs(trip)); } // _____________________________________________________________________________ -std::map ShapeBuilder::shapeify( - const TripTrie* trie, HopCache* hopCache) const { - LOG(VDEBUG) << "Map-matching trie " << trie; +EdgeListHops ShapeBuilder::route(const router::NodeCandRoute& ncr, + const router::RoutingAttrs& rAttrs) const { + router::Graph g; - assert(trie->getNdTrips().size()); - assert(trie->getNdTrips().begin()->second.size()); - RoutingAttrs rAttrs = getRAttrs(trie->getNdTrips().begin()->second[0]); + if (_cfg.solveMethod == "global") { + const router::EdgeListHops& ret = + _crouter.route(ncr, rAttrs, _motCfg.routingOpts, *_restr, &g); - std::map ret; + // write combination graph + if (!_cfg.shapeTripId.empty() && _cfg.writeCombGraph) { + LOG(INFO) << "Outputting combgraph.json..."; + std::ofstream pstr(_cfg.dbgOutputPath + "/combgraph.json"); + GeoGraphJsonOutput o; + o.printLatLng(g, pstr); + } - const auto& routes = route(trie, getECM(trie), hopCache); - - for (const auto& route : routes) { - ret[route.first] = route.second; + return ret; + } else if (_cfg.solveMethod == "greedy") { + return _crouter.routeGreedy(ncr, rAttrs, _motCfg.routingOpts, *_restr); + } else if (_cfg.solveMethod == "greedy2") { + return _crouter.routeGreedy2(ncr, rAttrs, _motCfg.routingOpts, *_restr); + } else { + LOG(ERROR) << "Unknown solution method " << _cfg.solveMethod; + exit(1); } - LOG(VDEBUG) << "Finished map-matching for trie " << trie; - - return ret; + return EdgeListHops(); } // _____________________________________________________________________________ -EdgeListHops ShapeBuilder::shapeify(Trip* trip) { +pfaedle::router::Shape ShapeBuilder::shape(Trip* trip) const { LOG(VDEBUG) << "Map-matching shape for trip #" << trip->getId() << " of mot " << trip->getRoute()->getType() << "(sn=" << trip->getShortname() << ", rsn=" << trip->getRoute()->getShortName() << ", rln=" << trip->getRoute()->getLongName() << ")"; - TripTrie trie; - trie.addTrip(trip, getRAttrs(trip), - _motCfg.routingOpts.transPenMethod == "timenorm", false); - const auto& routes = route(&trie, getECM(&trie), 0); + Shape ret; + ret.hops = route(getNCR(trip), getRAttrs(trip)); + ret.avgHopDist = avgHopDist(trip); - return routes.begin()->second; + LOG(VDEBUG) << "Finished map-matching for #" << trip->getId(); + + return ret; } // _____________________________________________________________________________ -Stats ShapeBuilder::shapeify(pfaedle::netgraph::Graph* outNg) { - Stats stats; - EDijkstra::ITERS = 0; +pfaedle::router::Shape ShapeBuilder::shape(Trip* trip) { + LOG(VDEBUG) << "Map-matching shape for trip #" << trip->getId() << " of mot " + << trip->getRoute()->getType() << "(sn=" << trip->getShortname() + << ", rsn=" << trip->getRoute()->getShortName() + << ", rln=" << trip->getRoute()->getLongName() << ")"; + + Shape ret; + ret.hops = route(getNCR(trip), getRAttrs(trip)); + ret.avgHopDist = avgHopDist(trip); + + LOG(VDEBUG) << "Finished map-matching for #" << trip->getId(); + + return ret; +} + +// _____________________________________________________________________________ +void ShapeBuilder::shape(pfaedle::netgraph::Graph* ng) { + TrGraphEdgs gtfsGraph; - T_START(cluster); LOG(DEBUG) << "Clustering trips..."; - const TripForests& forests = clusterTrips(_feed, _mots); - for (const auto& forest : forests) { - for (const auto& trie : forest.second) { - stats.numTries++; - stats.numTrieLeafs += trie.getNdTrips().size(); - } - } - LOG(DEBUG) << "Clustered trips into " << stats.numTries - << " tries with a total of " << stats.numTrieLeafs << " leafs in " - << T_STOP(cluster) << "ms"; - - LOG(DEBUG) << "Building candidate cache..."; - buildCandCache(forests); - LOG(DEBUG) << "Done."; - - std::map shpUse; - RouteRefColors refColors; + Clusters clusters = clusterTrips(_feed, _mots); + LOG(DEBUG) << "Clustered trips into " << clusters.size() << " clusters."; + std::map shpUsage; for (auto t : _feed->getTrips()) { - if (!t.getShape().empty()) shpUse[t.getShape()]++; - - // write the colors of trips we won't touch, but whose route we might - if (t.getStopTimes().size() < 2) continue; - if (!_mots.count(t.getRoute()->getType()) || - !_motCfg.mots.count(t.getRoute()->getType())) - continue; - - if (!t.getShape().empty() && !_cfg.dropShapes) { - refColors[t.getRoute()][t.getRoute()->getColor()].push_back(&t); - } + if (!t.getShape().empty()) shpUsage[t.getShape()]++; } - // we implicitely cluster by routing attrs here. This ensures that now two - // threads will access the same routing attrs later on, which safes us an - // expensive locking mechanism later on for the hop cache - std::vector tries; - for (const auto& forest : forests) { - tries.push_back(&(forest.second)); - for (const auto& trie : forest.second) { - for (const auto& trips : trie.getNdTrips()) { - stats.totNumTrips += trips.second.size(); + // to avoid unfair load balance on threads + std::random_shuffle(clusters.begin(), clusters.end()); + + size_t iters = EDijkstra::ITERS; + size_t totiters = EDijkstra::ITERS; + size_t oiters = EDijkstra::ITERS; + size_t j = 0; + + auto t1 = TIME(); + auto t2 = TIME(); + double totAvgDist = 0; + size_t totNumTrips = 0; + +#pragma omp parallel for num_threads(_numThreads) + for (size_t i = 0; i < clusters.size(); i++) { + j++; + + if (j % 10 == 0) { +#pragma omp critical + { + LOG(INFO) << "@ " << j << " / " << clusters.size() << " (" + << (static_cast((j * 1.0) / clusters.size() * 100)) + << "%, " << (EDijkstra::ITERS - oiters) << " iters, " + << "matching " << (10.0 / (TOOK(t1, TIME()) / 1000)) + << " trips/sec)"; + + oiters = EDijkstra::ITERS; + t1 = TIME(); } } - } - auto tStart = TIME(); - std::atomic at(0); + // explicitly call const version of shape here for thread safety + const Shape& cshp = + const_cast(*this).shape(clusters[i][0]); + totAvgDist += cshp.avgHopDist; - size_t numThreads = std::thread::hardware_concurrency(); - std::vector thrds(numThreads); - std::vector colors(numThreads); - std::vector gtfsGraphs(numThreads); + if (_cfg.buildTransitGraph) { +#pragma omp critical + { writeTransitGraph(cshp, >fsGraph, clusters[i]); } + } - size_t i = 0; - for (auto& t : thrds) { - t = std::thread(&ShapeBuilder::shapeWorker, this, &tries, &at, &shpUse, - &colors[i], >fsGraphs[i]); - i++; - } + std::vector distances; + const ad::cppgtfs::gtfs::Shape& shp = + getGtfsShape(cshp, clusters[i][0], &distances); - for (auto& thr : thrds) thr.join(); + LOG(VDEBUG) << "Took " << EDijkstra::ITERS - iters << " iterations."; + iters = EDijkstra::ITERS; - stats.solveTime = TOOK(tStart, TIME()); + totNumTrips += clusters[i].size(); - LOG(INFO) << "Matched " << stats.totNumTrips << " trips in " << std::fixed - << std::setprecision(2) << stats.solveTime << " ms."; - - // merge colors - for (auto& cols : colors) { - for (auto& route : cols) { - for (auto& col : route.second) { - refColors[route.first][col.first].insert( - refColors[route.first][col.first].end(), col.second.begin(), - col.second.end()); + for (auto t : clusters[i]) { + if (_cfg.evaluate && _evalFeed && _ecoll) { + std::lock_guard guard(_shpMutex); + _ecoll->add(t, _evalFeed->getShapes().get(t->getShape()), shp, + distances); } + + if (!t->getShape().empty() && shpUsage[t->getShape()] > 0) { + shpUsage[t->getShape()]--; + if (shpUsage[t->getShape()] == 0) { + std::lock_guard guard(_shpMutex); + _feed->getShapes().remove(t->getShape()); + } + } + setShape(t, shp, distances); } } - // update them in the routes, split routes if necessary - updateRouteColors(refColors); + LOG(INFO) << "Matched " << totNumTrips << " trips in " << clusters.size() + << " clusters."; + LOG(DEBUG) << "Took " << (EDijkstra::ITERS - totiters) + << " iterations in total."; + LOG(DEBUG) << "Took " << TOOK(t2, TIME()) << " ms in total."; + LOG(DEBUG) << "Total avg. tput " + << (static_cast(EDijkstra::ITERS - totiters)) / + TOOK(t2, TIME()) + << " iters/sec"; + LOG(DEBUG) << "Total avg. trip tput " + << (clusters.size() / (TOOK(t2, TIME()) / 1000)) << " trips/sec"; + LOG(DEBUG) << "Avg hop distance was " + << (totAvgDist / static_cast(clusters.size())) + << " meters"; if (_cfg.buildTransitGraph) { - LOG(DEBUG) << "Building transit network graph..."; - - // merge gtfsgraph from threads - TrGraphEdgs gtfsGraph; - - for (auto& g : gtfsGraphs) { - for (auto& ePair : g) { - gtfsGraph[ePair.first].insert(gtfsGraph[ePair.first].begin(), - ePair.second.begin(), ePair.second.end()); - } - } - buildNetGraph(>fsGraph, outNg); - } - - stats.dijkstraIters = EDijkstra::ITERS; - - return stats; -} - -// _____________________________________________________________________________ -void ShapeBuilder::updateRouteColors(const RouteRefColors& refColors) { - for (auto& route : refColors) { - if (route.second.size() == 1) { - // only one color found for this route, great! - // update inplace... - route.first->setColor(route.second.begin()->first); - if (route.first->getColor() != NO_COLOR) - route.first->setTextColor(getTextColor(route.first->getColor())); - } else { - // are there fare rules using this route? - std::vector< - std::pair*, - ad::cppgtfs::gtfs::FareRule>> - rules; - - for (auto& f : _feed->getFares()) { - for (auto r : f.second->getFareRules()) { - if (r.getRoute() == route.first) { - rules.push_back({f.second, r}); - } - } - } - - // add new routes... - for (auto& c : route.second) { - // keep the original one intact - if (c.first == route.first->getColor()) continue; - - auto routeCp = *route.first; - - // find free id - std::string newId = route.first->getId() + "::1"; - size_t i = 1; - while (_feed->getRoutes().get(newId)) { - i++; - newId = route.first->getId() + "::" + std::to_string(i); - } - - routeCp.setId(newId); - routeCp.setColor(c.first); - routeCp.setTextColor(getTextColor(routeCp.getColor())); - - auto newRoute = _feed->getRoutes().add(routeCp); - - // update trips to use that route - for (auto& t : c.second) t->setRoute(newRoute); - - // add new fare rules - for (auto a : rules) { - auto rule = a.second; - rule.setRoute(newRoute); - a.first->addFareRule(rule); - } - } - } + LOG(INFO) << "Building transit network graph..."; + buildTrGraph(>fsGraph, ng); } } // _____________________________________________________________________________ void ShapeBuilder::setShape(Trip* t, const ad::cppgtfs::gtfs::Shape& s, - const std::vector& distances) { + const std::vector& distances) { assert(distances.size() == t->getStopTimes().size()); // set distances size_t i = 0; @@ -556,45 +291,91 @@ void ShapeBuilder::setShape(Trip* t, const ad::cppgtfs::gtfs::Shape& s, } std::lock_guard guard(_shpMutex); - auto gtfsShp = _feed->getShapes().add(s); - t->setShape(gtfsShp); + t->setShape(_feed->getShapes().add(s)); } // _____________________________________________________________________________ ad::cppgtfs::gtfs::Shape ShapeBuilder::getGtfsShape( - const EdgeListHops& hops, Trip* t, size_t numOthers, - const RoutingAttrs& rAttrs, std::vector* hopDists, - uint32_t* bestColor) { + const Shape& shp, Trip* t, std::vector* hopDists) { ad::cppgtfs::gtfs::Shape ret(getFreeShapeId(t)); - assert(hops.size() == t->getStopTimes().size() - 1); - - std::map colors; - - const std::vector& gl = getGeom(hops, rAttrs, &colors, t, numOthers); - const std::vector& measures = getMeasure(gl); + assert(shp.hops.size() == t->getStopTimes().size() - 1); size_t seq = 0; + double dist = -1; + double lastDist = -1; hopDists->push_back(0); - for (size_t i = 0; i < gl.size(); i++) { - for (size_t j = 0; j < gl[i].size(); j++) { - ret.addPoint( - ShapePoint(gl[i][j].getY(), gl[i][j].getX(), measures[seq], seq)); - seq++; - } - hopDists->push_back(measures[seq - 1]); - } + POINT last(0, 0); + for (const auto& hop : shp.hops) { + const trgraph::Node* l = hop.start; + if (hop.edges.size() == 0) { + POINT ll = webMercToLatLng( + hop.start->pl().getGeom()->getX(), hop.start->pl().getGeom()->getY()); - // get most likely color - double best = 0; - *bestColor = NO_COLOR; - for (const auto& c : colors) { - double progr = c.second / measures.back(); - // TODO(patrick): make threshold configurable - if (progr > 0.9 && progr > best) { - best = progr; - *bestColor = c.first; + if (dist > -0.5) + dist += webMercMeterDist(last, *hop.start->pl().getGeom()); + else + dist = 0; + + last = *hop.start->pl().getGeom(); + + if (dist - lastDist > 0.01) { + ret.addPoint(ShapePoint(ll.getY(), ll.getX(), dist, seq)); + seq++; + lastDist = dist; + } + + dist += webMercMeterDist(last, *hop.end->pl().getGeom()); + last = *hop.end->pl().getGeom(); + + if (dist - lastDist > 0.01) { + ll = webMercToLatLng( + hop.end->pl().getGeom()->getX(), hop.end->pl().getGeom()->getY()); + ret.addPoint(ShapePoint(ll.getY(), ll.getX(), dist, seq)); + seq++; + lastDist = dist; + } } + + for (auto i = hop.edges.rbegin(); i != hop.edges.rend(); i++) { + const auto* e = *i; + if ((e->getFrom() == l) ^ e->pl().isRev()) { + for (size_t i = 0; i < e->pl().getGeom()->size(); i++) { + const POINT& cur = (*e->pl().getGeom())[i]; + if (dist > -0.5) + dist += webMercMeterDist(last, cur); + else + dist = 0; + last = cur; + if (dist - lastDist > 0.01) { + POINT ll = + webMercToLatLng(cur.getX(), cur.getY()); + ret.addPoint(ShapePoint(ll.getY(), ll.getX(), dist, seq)); + seq++; + lastDist = dist; + } + } + } else { + for (int64_t i = e->pl().getGeom()->size() - 1; i >= 0; i--) { + const POINT& cur = (*e->pl().getGeom())[i]; + if (dist > -0.5) + dist += webMercMeterDist(last, cur); + else + dist = 0; + last = cur; + if (dist - lastDist > 0.01) { + POINT ll = + webMercToLatLng(cur.getX(), cur.getY()); + ret.addPoint(ShapePoint(ll.getY(), ll.getX(), dist, seq)); + seq++; + lastDist = dist; + } + } + } + l = e->getOtherNd(l); + } + + hopDists->push_back(lastDist); } return ret; @@ -604,7 +385,7 @@ ad::cppgtfs::gtfs::Shape ShapeBuilder::getGtfsShape( std::string ShapeBuilder::getFreeShapeId(Trip* trip) { std::string ret; std::lock_guard guard(_shpMutex); - while (!ret.size() || _feed->getShapes().has(ret)) { + while (!ret.size() || _feed->getShapes().get(ret)) { _curShpCnt++; ret = "shp_"; ret += std::to_string(trip->getRoute()->getType()); @@ -621,24 +402,21 @@ const RoutingAttrs& ShapeBuilder::getRAttrs(const Trip* trip) { if (i == _rAttrs.end()) { router::RoutingAttrs ret; - ret.classifier = _classifier; - const auto& lnormzer = _motCfg.osmBuildOpts.lineNormzer; - const auto& snormzer = _motCfg.osmBuildOpts.statNormzer; ret.shortName = lnormzer.norm(trip->getRoute()->getShortName()); - ret.lineFrom = - snormzer.norm(trip->getStopTimes().front().getStop()->getName()); - ret.lineTo = { - snormzer.norm(trip->getStopTimes().back().getStop()->getName())}; - // fallbacks for line name if (ret.shortName.empty()) ret.shortName = lnormzer.norm(trip->getShortname()); if (ret.shortName.empty()) ret.shortName = lnormzer.norm(trip->getRoute()->getLongName()); + ret.fromString = _motCfg.osmBuildOpts.statNormzer.norm( + trip->getStopTimes().begin()->getStop()->getName()); + ret.toString = _motCfg.osmBuildOpts.statNormzer.norm( + (--trip->getStopTimes().end())->getStop()->getName()); + return _rAttrs .insert(std::pair(trip, ret)) .first->second; @@ -655,9 +433,7 @@ const RoutingAttrs& ShapeBuilder::getRAttrs(const Trip* trip) const { // _____________________________________________________________________________ void ShapeBuilder::getGtfsBox(const Feed* feed, const MOTs& mots, const std::string& tid, bool dropShapes, - osm::BBoxIdx* box, double maxSpeed, - std::vector* hopDists, - uint8_t verbosity) { + osm::BBoxIdx* box) { for (const auto& t : feed->getTrips()) { if (!tid.empty() && t.getId() != tid) continue; if (tid.empty() && !t.getShape().empty() && !dropShapes) continue; @@ -665,60 +441,7 @@ void ShapeBuilder::getGtfsBox(const Feed* feed, const MOTs& mots, if (mots.count(t.getRoute()->getType())) { DBox cur; - for (size_t i = 0; i < t.getStopTimes().size(); i++) { - // skip outliers - const auto& st = t.getStopTimes()[i]; - - int toTime = std::numeric_limits::max(); - double toD = 0; - int fromTime = std::numeric_limits::max(); - double fromD = 0; - - if (i > 0) { - const auto& stPrev = t.getStopTimes()[i - 1]; - toTime = st.getArrivalTime().seconds() - - stPrev.getDepartureTime().seconds(); - toD = util::geo::haversine( - st.getStop()->getLat(), st.getStop()->getLng(), - stPrev.getStop()->getLat(), stPrev.getStop()->getLng()); - if (hopDists) hopDists->push_back(toD); - } - - if (i < t.getStopTimes().size() - 1) { - const auto& stNext = t.getStopTimes()[i + 1]; - fromTime = stNext.getArrivalTime().seconds() - - st.getDepartureTime().seconds(); - fromD = util::geo::haversine( - st.getStop()->getLat(), st.getStop()->getLng(), - stNext.getStop()->getLat(), stNext.getStop()->getLng()); - } - - const double reqToTime = toD / maxSpeed; - const double reqFromTime = fromD / maxSpeed; - - const double BUFFER = 5 * 60; - - if (reqToTime > (BUFFER + toTime) * 3 * MAX_ROUTE_COST_DOUBLING_STEPS && - reqFromTime > - (BUFFER + fromTime) * 3 * MAX_ROUTE_COST_DOUBLING_STEPS) { - if (verbosity) { - LOG(WARN) - << "Skipping station '" << st.getStop()->getName() << "' (" - << st.getStop()->getId() << ") @ " << st.getStop()->getLat() - << ", " << st.getStop()->getLng() - << " for bounding box as the vehicle cannot realistically " - "reach and leave it in the scheduled time"; - } else { - LOG(DEBUG) - << "Skipping station '" << st.getStop()->getName() << "' (" - << st.getStop()->getId() << ") @ " << st.getStop()->getLat() - << ", " << st.getStop()->getLng() - << " for bounding box as the vehicle cannot realistically " - "reach and leave it in the scheduled time"; - } - continue; - } - + for (const auto& st : t.getStopTimes()) { cur = extendBox(DPoint(st.getStop()->getLng(), st.getStop()->getLat()), cur); } @@ -728,224 +451,141 @@ void ShapeBuilder::getGtfsBox(const Feed* feed, const MOTs& mots, } // _____________________________________________________________________________ -std::vector ShapeBuilder::getTransTimes(Trip* trip) const { - std::vector ret; +NodeCandRoute ShapeBuilder::getNCR(Trip* trip) const { + router::NodeCandRoute ncr(trip->getStopTimes().size()); - for (size_t i = 0; i < trip->getStopTimes().size() - 1; i++) { - auto cur = trip->getStopTimes()[i]; - auto next = trip->getStopTimes()[i + 1]; + size_t i = 0; - int depTime = cur.getDepartureTime().seconds(); - int arrTime = next.getArrivalTime().seconds(); - - int diff = arrTime - depTime; - if (diff < 1) diff = 1; - - ret.push_back(diff); - assert(ret.back() >= 0); - } - - return ret; -} - -// _____________________________________________________________________________ -std::vector ShapeBuilder::getTransDists(Trip* trip) const { - std::vector ret; - - for (size_t i = 0; i < trip->getStopTimes().size() - 1; i++) { - auto cur = trip->getStopTimes()[i]; - auto next = trip->getStopTimes()[i + 1]; - - double dist = util::geo::haversine( - cur.getStop()->getLat(), cur.getStop()->getLng(), - next.getStop()->getLat(), next.getStop()->getLng()); - - ret.push_back(dist); - } - - return ret; -} - -// _____________________________________________________________________________ -EdgeCandMap ShapeBuilder::getECM( - const TripTrie* trie) const { - EdgeCandMap ecm(trie->getNds().size()); - - for (size_t nid = 1; nid < trie->getNds().size(); nid++) { - auto trNd = trie->getNds()[nid]; - auto parentTrNd = trie->getNds()[trNd.parent]; - - if (nid != 1 && !trNd.arr) continue; - - double avgT = 0; - - if (trNd.trips) avgT = trNd.accTime / trNd.trips; - - const auto& cands = getEdgCands(trNd.reprStop); - ecm[nid].reserve(cands.size()); - - for (auto& cand : cands) { - const auto& timeExpCands = timeExpand(cand, avgT); - assert(timeExpCands.size()); - - for (size_t depChildId : trNd.childs) { - if (nid == 1) break; - auto chldTrNd = trie->getNds()[depChildId]; - double avgChildT = 0; - if (chldTrNd.trips) avgChildT = chldTrNd.accTime / chldTrNd.trips; - - double timeDiff = avgChildT - avgT; - if (timeDiff < 0) timeDiff = 0; - - for (size_t candId = 0; candId < timeExpCands.size(); candId++) { - const auto& cand = timeExpCands[candId]; - ecm[depChildId].push_back(cand); - ecm[depChildId].back().time += timeDiff; - - ecm[depChildId].back().pen = timePen(cand.time, avgChildT); - - for (size_t sucCandId = 0; sucCandId < timeExpCands.size(); - sucCandId++) { - if (timeExpCands[sucCandId].time <= ecm[depChildId].back().time) { - ecm[depChildId].back().depPrede.push_back(sucCandId + - ecm[nid].size()); - } - } - assert(ecm[depChildId].back().depPrede.size()); - } - } - ecm[nid].insert(ecm[nid].end(), timeExpCands.begin(), timeExpCands.end()); + for (const auto& st : trip->getStopTimes()) { + ncr[i] = getNodeCands(st.getStop()); + if (ncr[i].size() == 0) { + throw std::runtime_error("No node candidate found for station '" + + st.getStop()->getName() + "' on trip '" + + trip->getId() + "'"); } - - assert(ecm[nid].size() != 0); + i++; } - - return ecm; + return ncr; } // _____________________________________________________________________________ -double ShapeBuilder::timePen(int candTime, int schedTime) const { - // standard deviation of normal distribution - double standarddev = 5 * 60; +double ShapeBuilder::avgHopDist(Trip* trip) const { + size_t i = 0; + double sum = 0; - int diff = abs(candTime - schedTime); + const Stop* prev = 0; - double cNorm = diff / standarddev; - return cNorm * cNorm; + for (const auto& st : trip->getStopTimes()) { + if (!prev) { + prev = st.getStop(); + continue; + } + auto a = util::geo::latLngToWebMerc(prev->getLat(), + prev->getLng()); + auto b = util::geo::latLngToWebMerc( + st.getStop()->getLat(), st.getStop()->getLng()); + sum += util::geo::webMercMeterDist(a, b); + + prev = st.getStop(); + i++; + } + return sum / static_cast(i); } // _____________________________________________________________________________ -EdgeCandGroup ShapeBuilder::timeExpand(const EdgeCand& ec, int time) const { - EdgeCandGroup ret; - // TODO(patrick): heuristic for time expansion variance, currently - // unused - for (int i = 0; i < 1; i++) { - EdgeCand ecNew = ec; - // in 30 sec steps - ecNew.time = time + i * 30; - ecNew.pen = ecNew.pen + timePen(ecNew.time, time); - ret.push_back(ecNew); - } +Clusters ShapeBuilder::clusterTrips(Feed* f, MOTs mots) { + // building an index [start station, end station] -> [cluster] - return ret; -} + std::map> clusterIdx; -// _____________________________________________________________________________ -TripForests ShapeBuilder::clusterTrips(Feed* f, MOTs mots) { - TripForests forest; - std::map> trips; - - // warm the stop name normalizer caches so a - // multithreaded access later on will never write to the underlying cache - for (auto& stop : f->getStops()) { - const auto& snormzer = _motCfg.osmBuildOpts.statNormzer; - auto normedName = snormzer.norm(stop.getName()); - } - - // cluster by routing attr for parallization later on + Clusters ret; for (auto& trip : f->getTrips()) { - if (!_cfg.dropShapes && !trip.getShape().empty()) continue; + if (!trip.getShape().empty() && !_cfg.dropShapes) continue; if (trip.getStopTimes().size() < 2) continue; if (!mots.count(trip.getRoute()->getType()) || !_motCfg.mots.count(trip.getRoute()->getType())) continue; - // important: we are building the routing attributes here, so a - // multithreaded access later on will never write to the underlying cache - const auto& rAttrs = getRAttrs(&trip); + bool found = false; + auto spair = StopPair(trip.getStopTimes().begin()->getStop(), + trip.getStopTimes().rbegin()->getStop()); + const auto& c = clusterIdx[spair]; - trips[rAttrs].push_back(&trip); - forest[rAttrs] = {}; + for (size_t i = 0; i < c.size(); i++) { + if (routingEqual(ret[c[i]][0], &trip)) { + ret[c[i]].push_back(&trip); + found = true; + break; + } + } + if (!found) { + ret.push_back(Cluster{&trip}); + // explicit call to write render attrs to cache + getRAttrs(&trip); + clusterIdx[spair].push_back(ret.size() - 1); + } } - size_t numThreads = std::thread::hardware_concurrency(); - std::vector thrds(numThreads); - std::vector> attrs(numThreads); - - size_t i = 0; - for (auto it : trips) { - attrs[i].push_back(it.first); - if (++i == numThreads) i = 0; - } - - i = 0; - for (auto& t : thrds) { - t = std::thread(&ShapeBuilder::clusterWorker, this, &attrs[i], &trips, - &forest); - i++; - } - - for (auto& thr : thrds) thr.join(); - - return forest; + return ret; } // _____________________________________________________________________________ -void ShapeBuilder::clusterWorker( - const std::vector* rAttrsVec, - const std::map>* trips, - TripForests* forest) { - for (const auto& rAttrs : *rAttrsVec) { - for (auto& trip : trips->at(rAttrs)) { - bool ins = false; - auto& subForest = forest->at(rAttrs); - for (auto& trie : subForest) { - if (trie.addTrip(trip, rAttrs, - _motCfg.routingOpts.transPenMethod == "timenorm", - _cfg.noTrie)) { - ins = true; - break; - } - } +bool ShapeBuilder::routingEqual(const Stop* a, const Stop* b) { + if (a == b) return true; // trivial - if (!ins) { - subForest.resize(subForest.size() + 1); - subForest.back().addTrip( - trip, rAttrs, _motCfg.routingOpts.transPenMethod == "timenorm", - false); - } + auto namea = _motCfg.osmBuildOpts.statNormzer.norm(a->getName()); + auto nameb = _motCfg.osmBuildOpts.statNormzer.norm(b->getName()); + if (namea != nameb) return false; + + auto tracka = _motCfg.osmBuildOpts.trackNormzer.norm(a->getPlatformCode()); + auto trackb = _motCfg.osmBuildOpts.trackNormzer.norm(b->getPlatformCode()); + if (tracka != trackb) return false; + + POINT ap = + util::geo::latLngToWebMerc(a->getLat(), a->getLng()); + POINT bp = + util::geo::latLngToWebMerc(b->getLat(), b->getLng()); + + double d = util::geo::webMercMeterDist(ap, bp); + + if (d > 1) return false; + + return true; +} + +// _____________________________________________________________________________ +bool ShapeBuilder::routingEqual(Trip* a, Trip* b) { + if (a->getStopTimes().size() != b->getStopTimes().size()) return false; + if (getRAttrs(a) != getRAttrs(b)) return false; + + auto stb = b->getStopTimes().begin(); + for (const auto& sta : a->getStopTimes()) { + if (!routingEqual(sta.getStop(), stb->getStop())) { + return false; } + stb++; } + + return true; } // _____________________________________________________________________________ const pfaedle::trgraph::Graph* ShapeBuilder::getGraph() const { return _g; } // _____________________________________________________________________________ -void ShapeBuilder::writeTransitGraph( - const router::EdgeListHops& hops, TrGraphEdgs* edgs, - const std::vector& trips) const { - for (const auto& hop : hops) { +void ShapeBuilder::writeTransitGraph(const Shape& shp, TrGraphEdgs* edgs, + const Cluster& cluster) const { + for (auto hop : shp.hops) { for (const auto* e : hop.edges) { if (e->pl().isRev()) e = _g->getEdg(e->getTo(), e->getFrom()); - (*edgs)[e].insert((*edgs)[e].begin(), trips.begin(), trips.end()); + (*edgs)[e].insert(cluster.begin(), cluster.end()); } } } // _____________________________________________________________________________ -void ShapeBuilder::buildNetGraph(TrGraphEdgs* edgs, - pfaedle::netgraph::Graph* ng) const { +void ShapeBuilder::buildTrGraph(TrGraphEdgs* edgs, + pfaedle::netgraph::Graph* ng) const { std::unordered_map nodes; for (auto ep : *edgs) { @@ -967,308 +607,3 @@ void ShapeBuilder::buildNetGraph(TrGraphEdgs* edgs, pfaedle::netgraph::EdgePL(*e->pl().getGeom(), ep.second)); } } - -// _____________________________________________________________________________ -std::vector ShapeBuilder::getGeom(const EdgeListHops& hops, - const RoutingAttrs& rAttrs, - std::map* colors, - Trip* t, size_t numOthers) const { - std::vector ret; - - for (size_t i = hops.size(); i > 0; i--) { - const auto& hop = hops[i - 1]; - if (!hop.start || !hop.end) { - // no hop was found, use the fallback geometry - - if (_cfg.verbosity) { - const auto stopFr = t->getStopTimes()[hops.size() - i].getStop(); - const auto stopTo = t->getStopTimes()[hops.size() - i + 1].getStop(); - - LOG(WARN) << "No viable hop found between stops '" << stopFr->getName() - << "' (" << stopFr->getId() << ") and '" << stopTo->getName() - << "' (" << stopTo->getId() << ") for trip " << t->getId() - << " of type '" - << ad::cppgtfs::gtfs::flat::Route::getTypeString( - t->getRoute()->getType()) - << "'" - << (numOthers > 1 ? " (and " + std::to_string(numOthers) + - " similar trips)" - : "") - << ", falling back to straight line"; - } - - if (hop.start) { - if (hop.progrStart > 0) { - auto l = getLine(hop.start); - PolyLine pl(l); - const auto& seg = pl.getSegment(hop.progrStart, 1); - ret.push_back({seg.getLine().front(), hop.pointEnd}); - } else { - ret.push_back({*hop.start->getFrom()->pl().getGeom(), hop.pointEnd}); - } - } else if (hop.end) { - if (hop.progrEnd > 0) { - auto l = getLine(hop.end); - PolyLine pl(l); - const auto& seg = pl.getSegment(0, hop.progrEnd); - ret.push_back({hop.pointStart, seg.getLine().back()}); - } else { - ret.push_back({hop.pointStart, *hop.end->getFrom()->pl().getGeom()}); - } - } else { - ret.push_back({hop.pointStart, hop.pointEnd}); - } - } else { - const auto& l = getLine(hop, rAttrs, colors); - ret.push_back(l); - } - } - - return ret; -} - -// _____________________________________________________________________________ -LINE ShapeBuilder::getLine(const EdgeListHop& hop, const RoutingAttrs& rAttrs, - std::map* colors) const { - LINE l; - - const auto& curL = getLine(hop.start); - - if (hop.edges.size() == 0) { - // draw direct line between positions on edges - if (hop.progrStart > 0) { - PolyLine pl(curL); - const auto& seg = pl.getSegment(hop.progrStart, 1); - l.push_back(seg.front()); - } else { - l.push_back(curL.front()); - } - - if (hop.progrEnd > 0) { - PolyLine pl(getLine(hop.end)); - const auto& seg = pl.getSegment(0, hop.progrEnd); - l.push_back(seg.back()); - } else { - l.push_back(*hop.end->getFrom()->pl().getGeom()); - } - - return l; - } - - // special case: start and end are on the same edge! - if (hop.edges.size() == 1 && hop.start == hop.end) { - PolyLine pl(curL); - const auto& seg = pl.getSegment(hop.progrStart, hop.progrEnd); - l.insert(l.end(), seg.getLine().begin(), seg.getLine().end()); - - for (const auto& color : getColorMatch(hop.start, rAttrs)) { - (*colors)[color] += hop.start->pl().getLength(); - } - - return l; - } - - auto from = hop.start->getFrom(); - - if (hop.progrStart > 0) { - PolyLine pl(curL); - const auto& seg = pl.getSegment(hop.progrStart, 1); - l.insert(l.end(), seg.getLine().begin(), seg.getLine().end()); - - double l = hop.start->pl().getLength() * (1 - hop.progrStart); - for (const auto& color : getColorMatch(hop.start, rAttrs)) { - (*colors)[color] += l; - } - } else { - l.insert(l.end(), curL.begin(), curL.end()); - - double l = hop.start->pl().getLength(); - for (const auto& color : getColorMatch(hop.start, rAttrs)) { - (*colors)[color] += l; - } - } - - from = hop.start->getOtherNd(from); - - if (hop.edges.size() > 1) { - for (size_t j = hop.edges.size() - 2; j > 0; j--) { - const auto* e = hop.edges[j]; - const auto& curL = getLine(e); - l.insert(l.end(), curL.begin(), curL.end()); - from = e->getOtherNd(from); - - double l = e->pl().getLength(); - for (const auto& color : getColorMatch(e, rAttrs)) { - (*colors)[color] += l; - } - } - } - - if (hop.progrEnd > 0) { - PolyLine pl(getLine(hop.end)); - const auto& seg = pl.getSegment(0, hop.progrEnd); - l.insert(l.end(), seg.getLine().begin(), seg.getLine().end()); - - double l = hop.end->pl().getLength() * hop.progrEnd; - for (const auto& color : getColorMatch(hop.end, rAttrs)) { - (*colors)[color] += l; - } - } - - if (l.size() > 1) return util::geo::simplify(l, 0.5 / M_PER_DEG); - return l; -} - -// _____________________________________________________________________________ -LINE ShapeBuilder::getLine(const trgraph::Edge* e) const { - LINE l; - if (!e->pl().getGeom() || e->pl().getGeom()->size() == 0) - return {*e->getFrom()->pl().getGeom(), *e->getTo()->pl().getGeom()}; - if (e->pl().isRev()) { - l.insert(l.end(), e->pl().getGeom()->rbegin(), e->pl().getGeom()->rend()); - } else { - l.insert(l.end(), e->pl().getGeom()->begin(), e->pl().getGeom()->end()); - } - return l; -} - -// _____________________________________________________________________________ -std::vector ShapeBuilder::getMeasure( - const std::vector& lines) const { - assert(lines.size()); - assert(lines.front().size()); - std::vector ret; - POINT last = lines.front().front(); - - for (const auto& l : lines) { - for (size_t i = 0; i < l.size(); i++) { - if (ret.size() == 0) { - ret.push_back(0); - } else { - float v = ret.back() + util::geo::haversine(last, l[i]); - assert(v >= ret.back()); // required by GTFS standard! - ret.push_back(v); - } - last = l[i]; - } - } - - return ret; -} - -// _____________________________________________________________________________ -void ShapeBuilder::shapeWorker( - const std::vector* tries, std::atomic* at, - std::map* shpUse, - std::map>>* routeColors, - TrGraphEdgs* gtfsGraph) { - while (1) { - size_t j = (*at)++; - if (j >= tries->size()) return; - - int step = tries->size() < 10 ? tries->size() : 10; - - if (j % (tries->size() / step) == 0) { - LOG(INFO) << "@ " << (static_cast((j * 1.0) / tries->size() * 100)) - << "%"; - LOG(DEBUG) << "(@ trie forest " << j << "/" << tries->size() << ")"; - } - - const auto& forest = *((*tries)[j]); - - // hop cache per forest, thus per routing attributes - HopCache hopCacheLoc; - HopCache* hopCache = 0; - - if (!_cfg.noHopCache) hopCache = &hopCacheLoc; - - for (size_t i = 0; i < forest.size(); i++) { - const TripTrie* trie = &(forest[i]); - const auto& hops = shapeify(trie, hopCache); - - for (const auto& leaf : trie->getNdTrips()) { - std::vector distances; - const RoutingAttrs& rAttrs = trie->getNd(leaf.first).rAttrs; - - uint32_t color; - - const ad::cppgtfs::gtfs::Shape& shp = - getGtfsShape(hops.at(leaf.first), leaf.second[0], - leaf.second.size(), rAttrs, &distances, &color); - - if (_cfg.buildTransitGraph) { - writeTransitGraph(hops.at(leaf.first), gtfsGraph, leaf.second); - } - - for (auto t : leaf.second) { - if (_cfg.writeColors && color != NO_COLOR && - t->getRoute()->getColor() == NO_COLOR && - t->getRoute()->getTextColor() == NO_COLOR) { - (*routeColors)[t->getRoute()][color].push_back(t); - } else { - // else, use the original route color - (*routeColors)[t->getRoute()][t->getRoute()->getColor()].push_back( - t); - } - - if (!t->getShape().empty() && (*shpUse)[t->getShape()] > 0) { - (*shpUse)[t->getShape()]--; - if ((*shpUse)[t->getShape()] == 0) { - std::lock_guard guard(_shpMutex); - _feed->getShapes().remove(t->getShape()); - } - } - setShape(t, shp, distances); - } - } - } - } -} - -// _____________________________________________________________________________ -void ShapeBuilder::edgCandWorker(std::vector* stops, - GrpCache* cache) { - for (auto stop : *stops) { - (*cache)[stop] = getEdgCands(stop); - } -} - -// _____________________________________________________________________________ -std::set ShapeBuilder::getColorMatch( - const trgraph::Edge* e, const RoutingAttrs& rAttrs) const { - std::set ret; - for (const auto* l : e->pl().getLines()) { - auto simi = rAttrs.simi(l); - if (simi.nameSimilar && l->color != NO_COLOR) ret.insert(l->color); - } - - return ret; -} - -// _____________________________________________________________________________ -uint32_t ShapeBuilder::getTextColor(uint32_t c) const { - double r = (c & 0x00FF0000) >> 16; - double g = (c & 0x0000FF00) >> 8; - double b = (c & 0x000000FF); - - // gray value approx - double a = sqrt((r * r + g * g + b * b) / 3); - - // below a certain gray value, use white, else black - if (a < 140) return 0x00FFFFFF; - return 0; -} - -// _____________________________________________________________________________ -double ShapeBuilder::emWeight(double mDist) const { - if (_motCfg.routingOpts.emPenMethod == "exp") { - return mDist * _motCfg.routingOpts.stationDistPenFactor; - } - - if (_motCfg.routingOpts.emPenMethod == "norm") { - double s = mDist * _motCfg.routingOpts.stationDistPenFactor; - return 0.5 * s * s; - } - - return mDist; -} diff --git a/src/pfaedle/router/ShapeBuilder.h b/src/pfaedle/router/ShapeBuilder.h index c0309e2..560d1b2 100644 --- a/src/pfaedle/router/ShapeBuilder.h +++ b/src/pfaedle/router/ShapeBuilder.h @@ -5,46 +5,43 @@ #ifndef PFAEDLE_ROUTER_SHAPEBUILDER_H_ #define PFAEDLE_ROUTER_SHAPEBUILDER_H_ -#include #include #include #include #include #include #include - #include "ad/cppgtfs/gtfs/Feed.h" #include "pfaedle/Def.h" #include "pfaedle/config/MotConfig.h" #include "pfaedle/config/PfaedleConfig.h" +#include "pfaedle/eval/Collector.h" #include "pfaedle/gtfs/Feed.h" #include "pfaedle/netgraph/Graph.h" #include "pfaedle/osm/Restrictor.h" #include "pfaedle/router/Misc.h" #include "pfaedle/router/Router.h" -#include "pfaedle/router/Stats.h" -#include "pfaedle/router/TripTrie.h" -#include "pfaedle/statsimi-classifier/StatsimiClassifier.h" #include "pfaedle/trgraph/Graph.h" #include "util/geo/Geo.h" namespace pfaedle { namespace router { -typedef std::vector> TripForest; -typedef std::map TripForests; -typedef std::pair - StopPair; -typedef std::unordered_map - TripRAttrs; -typedef std::unordered_map> +using ad::cppgtfs::gtfs::Stop; +using pfaedle::gtfs::Trip; +using pfaedle::gtfs::Feed; + +struct Shape { + router::EdgeListHops hops; + double avgHopDist; +}; + +typedef std::vector Cluster; +typedef std::vector Clusters; +typedef std::pair StopPair; +typedef std::unordered_map TripRAttrs; +typedef std::unordered_map> TrGraphEdgs; -typedef std::map>> - RouteRefColors; -typedef std::unordered_map - GrpCache; /* * Layer class for the router. Provides an interface for direct usage with @@ -52,121 +49,76 @@ typedef std::unordered_map */ class ShapeBuilder { public: - ShapeBuilder( - pfaedle::gtfs::Feed* feed, MOTs mots, const config::MotConfig& motCfg, - trgraph::Graph* g, router::FeedStops* stops, osm::Restrictor* restr, - const pfaedle::statsimiclassifier::StatsimiClassifier* classifier, - router::Router* router, const config::Config& cfg); + ShapeBuilder(Feed* feed, ad::cppgtfs::gtfs::Feed* evalFeed, MOTs mots, + const config::MotConfig& motCfg, eval::Collector* ecoll, + trgraph::Graph* g, router::FeedStops* stops, + osm::Restrictor* restr, const config::Config& cfg); - Stats shapeify(pfaedle::netgraph::Graph* outNg); + void shape(pfaedle::netgraph::Graph* ng); router::FeedStops* getFeedStops(); - // shape single trip - std::pair, Stats> shapeL(pfaedle::gtfs::Trip* trip); + const NodeCandGroup& getNodeCands(const Stop* s) const; - std::map shapeify( - const TripTrie* trie, HopCache* hopCache) const; - EdgeListHops shapeify(pfaedle::gtfs::Trip* trip); + LINE shapeL(const router::NodeCandRoute& ncr, + const router::RoutingAttrs& rAttrs); + LINE shapeL(Trip* trip); + + pfaedle::router::Shape shape(Trip* trip) const; + pfaedle::router::Shape shape(Trip* trip); const trgraph::Graph* getGraph() const; - static void getGtfsBox(const pfaedle::gtfs::Feed* feed, const MOTs& mots, + static void getGtfsBox(const Feed* feed, const MOTs& mots, const std::string& tid, bool dropShapes, - osm::BBoxIdx* box, double maxSpeed, - std::vector* hopDists, uint8_t verbosity); + osm::BBoxIdx* box); private: - pfaedle::gtfs::Feed* _feed; + Feed* _feed; + ad::cppgtfs::gtfs::Feed* _evalFeed; MOTs _mots; config::MotConfig _motCfg; + eval::Collector* _ecoll; config::Config _cfg; trgraph::Graph* _g; + router::Router _crouter; + router::FeedStops* _stops; - EdgeCandGroup _emptyNCG; + NodeCandGroup _emptyNCG; - size_t _curShpCnt; + size_t _curShpCnt, _numThreads; std::mutex _shpMutex; TripRAttrs _rAttrs; osm::Restrictor* _restr; - const pfaedle::statsimiclassifier::StatsimiClassifier* _classifier; - GrpCache _grpCache; - router::Router* _router; + void buildGraph(router::FeedStops* fStops); - TripForests clusterTrips(pfaedle::gtfs::Feed* f, MOTs mots); - void buildNetGraph(TrGraphEdgs* edgs, pfaedle::netgraph::Graph* ng) const; + Clusters clusterTrips(Feed* f, MOTs mots); + void writeTransitGraph(const Shape& shp, TrGraphEdgs* edgs, + const Cluster& cluster) const; + void buildTrGraph(TrGraphEdgs* edgs, pfaedle::netgraph::Graph* ng) const; - std::string getFreeShapeId(pfaedle::gtfs::Trip* t); - ad::cppgtfs::gtfs::Shape getGtfsShape(const EdgeListHops& shp, - pfaedle::gtfs::Trip* t, - size_t numOthers, - const RoutingAttrs& rAttrs, - std::vector* hopDists, - uint32_t* bestColor); + std::string getFreeShapeId(Trip* t); - void setShape(pfaedle::gtfs::Trip* t, const ad::cppgtfs::gtfs::Shape& s, - const std::vector& dists); + ad::cppgtfs::gtfs::Shape getGtfsShape(const Shape& shp, Trip* t, + std::vector* hopDists); - EdgeCandGroup getEdgCands(const ad::cppgtfs::gtfs::Stop* s) const; + void setShape(Trip* t, const ad::cppgtfs::gtfs::Shape& s, + const std::vector& dists); - router::EdgeCandMap getECM(const TripTrie* trie) const; - std::vector getTransTimes(pfaedle::gtfs::Trip* trip) const; - std::vector getTransDists(pfaedle::gtfs::Trip* trip) const; - const router::RoutingAttrs& getRAttrs(const pfaedle::gtfs::Trip* trip) const; - const router::RoutingAttrs& getRAttrs(const pfaedle::gtfs::Trip* trip); - std::map route( - const TripTrie* trie, const EdgeCandMap& ecm, - HopCache* hopCache) const; - double emWeight(double mDist) const; - - void buildCandCache(const TripForests& clusters); - void buildIndex(); - - std::vector getGeom(const EdgeListHops& shp, const RoutingAttrs& rAttrs, - std::map* colors, Trip* t, - size_t numOthers) const; - double timePen(int candTime, int schedTime) const; - - LINE getLine(const EdgeListHop& hop, const RoutingAttrs&, - std::map* colMap) const; - LINE getLine(const trgraph::Edge* edg) const; - std::vector getMeasure(const std::vector& lines) const; - - trgraph::Edge* deg2reachable(trgraph::Edge* e, - std::set edgs) const; - - EdgeCandGroup timeExpand(const EdgeCand& ec, int time) const; - - std::set getColorMatch(const trgraph::Edge* e, - const RoutingAttrs& rAttrs) const; - - void updateRouteColors(const RouteRefColors& c); - - uint32_t getTextColor(uint32_t c) const; - - void writeTransitGraph(const router::EdgeListHops& shp, TrGraphEdgs* edgs, - const std::vector& trips) const; - - void shapeWorker( - const std::vector* tries, std::atomic* at, - std::map* shpUsage, - std::map>>*, - TrGraphEdgs* gtfsGraph); - - void edgCandWorker(std::vector* stops, GrpCache* cache); - void clusterWorker(const std::vector* rAttrs, - const std::map>* trips, - TripForests* forest); - - pfaedle::trgraph::EdgeGrid _eGrid; - pfaedle::trgraph::NodeGrid _nGrid; + router::NodeCandRoute getNCR(Trip* trip) const; + double avgHopDist(Trip* trip) const; + const router::RoutingAttrs& getRAttrs(const Trip* trip) const; + const router::RoutingAttrs& getRAttrs(const Trip* trip); + bool routingEqual(Trip* a, Trip* b); + bool routingEqual(const Stop* a, const Stop* b); + router::EdgeListHops route(const router::NodeCandRoute& ncr, + const router::RoutingAttrs& rAttrs) const; }; - } // namespace router } // namespace pfaedle diff --git a/src/pfaedle/router/Stats.h b/src/pfaedle/router/Stats.h deleted file mode 100644 index fa3659a..0000000 --- a/src/pfaedle/router/Stats.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef PFAEDLE_ROUTER_STATS_H_ -#define PFAEDLE_ROUTER_STATS_H_ - -#include -#include -#include -#include "util/String.h" - -namespace pfaedle { -namespace router { - -struct Stats { - Stats() - : totNumTrips(0), - numTries(0), - numTrieLeafs(0), - solveTime(0), - dijkstraIters(0) {} - size_t totNumTrips; - size_t numTries; - size_t numTrieLeafs; - double solveTime; - size_t dijkstraIters; -}; - -inline Stats operator+ (const Stats& c1, const Stats& c2) { - Stats ret = c1; - ret.totNumTrips += c2.totNumTrips; - ret.numTries += c2.numTries; - ret.numTrieLeafs += c2.numTrieLeafs; - ret.solveTime += c2.solveTime; - ret.dijkstraIters += c2.dijkstraIters; - return ret; -} - -inline Stats& operator+= (Stats& c1, const Stats& c2) { - c1 = c1 + c2; - return c1; -} - -} // namespace router -} // namespace pfaedle - -#endif // PFAEDLE_ROUTER_STATS_H_ diff --git a/src/pfaedle/router/TripTrie.h b/src/pfaedle/router/TripTrie.h deleted file mode 100644 index dfcfde5..0000000 --- a/src/pfaedle/router/TripTrie.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef PFAEDLE_ROUTER_TRIPTRIE_H_ -#define PFAEDLE_ROUTER_TRIPTRIE_H_ - -#include -#include -#include -#include "ad/cppgtfs/gtfs/Feed.h" -#include "pfaedle/gtfs/Feed.h" -#include "pfaedle/gtfs/StopTime.h" -#include "pfaedle/router/RoutingAttrs.h" - -namespace pfaedle { -namespace router { - -struct TripTrieNd { - const ad::cppgtfs::gtfs::Stop* reprStop; - std::string stopName; // the stop name at this node - std::string platform; // the platform of node - POINT pos; // the position of this node - double lat, lng; - int time; - bool arr; - int accTime; - size_t trips; - size_t parent; - std::vector childs; - RoutingAttrs rAttrs; -}; - -template -class TripTrie { - public: - // init node 0, this is the first decision node - TripTrie() : _nds(1) {} - bool addTrip(TRIP* trip, const RoutingAttrs& rAttrs, - bool timeEx, bool degen); - - const std::vector& getNds() const; - const TripTrieNd& getNd(size_t nid) const; - - void toDot(std::ostream& os, const std::string& rootName, size_t gid) const; - const std::map>& getNdTrips() const; - - private: - std::vector _nds; - std::map _tripNds; - std::map> _ndTrips; - - bool add(TRIP* trip, const RoutingAttrs& rAttrs, bool timeEx); - size_t get(TRIP* trip, bool timeEx); - - size_t getMatchChild(size_t parentNid, const std::string& stopName, - const std::string& platform, POINT pos, int time, - bool timeEx) const; - size_t insert(const ad::cppgtfs::gtfs::Stop* stop, const RoutingAttrs& rAttrs, - const POINT& pos, int time, bool arr, size_t parent); -}; - -#include "pfaedle/router/TripTrie.tpp" -} // namespace router -} // namespace pfaedle - -#endif // PFAEDLE_ROUTER_TRIPTRIE_H_ diff --git a/src/pfaedle/router/TripTrie.tpp b/src/pfaedle/router/TripTrie.tpp deleted file mode 100644 index 9f78915..0000000 --- a/src/pfaedle/router/TripTrie.tpp +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#include -#include -#include - -#include "TripTrie.h" -#include "ad/cppgtfs/gtfs/Feed.h" -#include "pfaedle/gtfs/Feed.h" -#include "pfaedle/gtfs/StopTime.h" - -using pfaedle::gtfs::Trip; -using pfaedle::router::TripTrie; - -// _____________________________________________________________________________ -template -bool TripTrie::addTrip(TRIP* trip, const RoutingAttrs& rAttrs, - bool timeEx, bool degen) { - if (!degen) return add(trip, rAttrs, timeEx); - - // check if trip is already fully and uniquely contained, if not, fail - size_t existing = get(trip, timeEx); - if (existing && _nds[existing].childs.size() == 0) { - _tripNds[trip] = existing; - _ndTrips[existing].push_back(trip); - return true; - } else { - return false; - } -} - -// _____________________________________________________________________________ -template -bool TripTrie::add(TRIP* trip, const RoutingAttrs& rAttrs, bool timeEx) { - if (trip->getStopTimes().size() == 0) return false; - - int startSecs = 0; - - if (!trip->getStopTimes().front().getDepartureTime().empty()) { - startSecs = trip->getStopTimes().front().getDepartureTime().seconds(); - } - - size_t curNdId = 0; - for (size_t stId = 0; stId < trip->getStopTimes().size(); stId++) { - const auto st = trip->getStopTimes()[stId]; - - std::string name = st.getStop()->getName(); - std::string platform = st.getStop()->getPlatformCode(); - POINT pos = util::geo::latLngToWebMerc(st.getStop()->getLat(), - st.getStop()->getLng()); - - if (stId > 0) { - int arrTime = startSecs; - - if (!st.getArrivalTime().empty()) { - arrTime = st.getArrivalTime().seconds() - startSecs; - } - - size_t arrChild = - getMatchChild(curNdId, name, platform, pos, arrTime, timeEx); - - if (arrChild) { - curNdId = arrChild; - - _nds[arrChild].accTime += arrTime; - _nds[arrChild].trips += 1; - - _nds[arrChild].rAttrs.merge(rAttrs); - } else { - curNdId = insert(st.getStop(), rAttrs, pos, arrTime, true, curNdId); - } - } - - if (stId < trip->getStopTimes().size() - 1) { - int depTime = startSecs; - - if (!st.getDepartureTime().empty()) { - depTime = st.getDepartureTime().seconds() - startSecs; - } - - size_t depChild = - getMatchChild(curNdId, name, platform, pos, depTime, timeEx); - - if (depChild) { - curNdId = depChild; - - _nds[depChild].accTime += depTime; - _nds[depChild].trips += 1; - - _nds[depChild].rAttrs.merge(rAttrs); - } else { - if (stId == 0 && _tripNds.size() > 0) return false; - curNdId = insert(st.getStop(), rAttrs, pos, depTime, false, curNdId); - } - } - } - - // curNdId is now the last matching node, insert the trip here - _tripNds[trip] = curNdId; - _ndTrips[curNdId].push_back(trip); - - return true; -} - -// _____________________________________________________________________________ -template -size_t TripTrie::get(TRIP* trip, bool timeEx) { - if (trip->getStopTimes().size() == 0) return false; - - int startSecs = trip->getStopTimes().front().getDepartureTime().seconds(); - - size_t curNdId = 0; - for (size_t stId = 0; stId < trip->getStopTimes().size(); stId++) { - const auto st = trip->getStopTimes()[stId]; - - std::string name = st.getStop()->getName(); - std::string platform = st.getStop()->getPlatformCode(); - POINT pos = util::geo::latLngToWebMerc(st.getStop()->getLat(), - st.getStop()->getLng()); - - if (stId > 0) { - int arrTime = startSecs; - - if (!st.getArrivalTime().empty()) { - arrTime = st.getArrivalTime().seconds() - startSecs; - } - - size_t arrChild = - getMatchChild(curNdId, name, platform, pos, arrTime, timeEx); - - if (arrChild) { - curNdId = arrChild; - } else { - return 0; - } - } - - if (stId < trip->getStopTimes().size() - 1) { - int depTime = startSecs; - - if (!st.getDepartureTime().empty()) { - depTime = st.getDepartureTime().seconds() - startSecs; - } - - size_t depChild = - getMatchChild(curNdId, name, platform, pos, depTime, timeEx); - - if (depChild) { - curNdId = depChild; - } else { - return 0; - } - } - } - - return curNdId; -} - -// _____________________________________________________________________________ -template -size_t TripTrie::insert(const ad::cppgtfs::gtfs::Stop* stop, - const RoutingAttrs& rAttrs, const POINT& pos, - int time, bool arr, size_t parent) { - _nds.emplace_back(TripTrieNd{stop, - stop->getName(), - stop->getPlatformCode(), - pos, - stop->getLat(), - stop->getLng(), - time, - arr, - time, - 1, - parent, - {}, - rAttrs}); - _nds[parent].childs.push_back(_nds.size() - 1); - return _nds.size() - 1; -} - -// _____________________________________________________________________________ -template -const std::vector& TripTrie::getNds() const { - return _nds; -} - -// _____________________________________________________________________________ -template -size_t TripTrie::getMatchChild(size_t parentNid, - const std::string& stopName, - const std::string& platform, POINT pos, - int time, bool timeEx) const { - for (size_t child : _nds[parentNid].childs) { - if (_nds[child].stopName == stopName && _nds[child].platform == platform && - util::geo::dist(_nds[child].pos, pos) < 1 && - (!timeEx || _nds[child].time == time)) { - return child; - } - } - - return 0; -} - -// _____________________________________________________________________________ -template -void TripTrie::toDot(std::ostream& os, const std::string& rootName, - size_t gid) const { - os << "digraph triptrie" << gid << " {"; - - for (size_t nid = 0; nid < _nds.size(); nid++) { - std::string color = "white"; - if (_ndTrips.count(nid)) color = "red"; - if (nid == 0) { - os << "\"" << gid << ":0\" [label=\"" << rootName << "\"];\n"; - } else { - os << "\"" << gid << ":" << nid - << "\" [shape=\"box\" style=\"filled\" fillcolor=\"" << color - << "\" label=\"#" << nid << ", " << _nds[nid].stopName << "@" - << util::geo::getWKT(_nds[nid].pos) << " t=" << _nds[nid].time - << "\"];\n"; - } - } - - for (size_t nid = 0; nid < _nds.size(); nid++) { - for (size_t child : _nds[nid].childs) { - os << "\"" << gid << ":" << nid << "\" -> \"" << gid << ":" << child - << "\";\n"; - } - } - - os << "}"; -} - -// _____________________________________________________________________________ -template -const std::map>& TripTrie::getNdTrips() const { - return _ndTrips; -} - -// _____________________________________________________________________________ -template -const pfaedle::router::TripTrieNd& TripTrie::getNd(size_t nid) const { - return _nds[nid]; -} diff --git a/src/pfaedle/router/Weights.cpp b/src/pfaedle/router/Weights.cpp deleted file mode 100644 index 4b96918..0000000 --- a/src/pfaedle/router/Weights.cpp +++ /dev/null @@ -1,259 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#include -#include "pfaedle/router/Weights.h" - -using pfaedle::router::DistDiffTransWeight; -using pfaedle::router::ExpoTransWeight; -using pfaedle::router::LineSimilarity; -using pfaedle::router::NormDistrTransWeight; -using util::geo::haversine; - -// _____________________________________________________________________________ -ExpoTransWeight::DistHeur::DistHeur(double maxV, const RoutingOpts& rOpts, - const std::set& tos) - : _rOpts(rOpts), _maxV(maxV), _maxCentD(0), _lastE(0) { - size_t c = 0; - double x = 0, y = 0; - - for (const auto to : tos) { - x += to->getFrom()->pl().getGeom()->getX(); - y += to->getFrom()->pl().getGeom()->getY(); - c++; - } - - x /= c; - y /= c; - - _center = POINT{x, y}; - - for (const auto to : tos) { - const double cur = haversine(*to->getFrom()->pl().getGeom(), _center); - if (cur > _maxCentD) _maxCentD = cur; - } - - _maxCentD /= _maxV; -} - -// _____________________________________________________________________________ -uint32_t ExpoTransWeight::DistHeur::operator()( - const trgraph::Edge* a, const std::set& b) const { - UNUSED(b); - - // avoid repeated calculation for the same edge over and over again - if (a == _lastE) return _lastC; - - _lastE = a; - - const double d = haversine(*a->getFrom()->pl().getGeom(), _center); - const double heur = fmax(0, (d / _maxV - _maxCentD) * 10); - - // avoid overflow - if (heur > std::numeric_limits::max()) { - _lastC = std::numeric_limits::max(); - ; - return _lastC; - } - - _lastC = heur; - return heur; -} - -// _____________________________________________________________________________ -uint32_t ExpoTransWeight::CostFunc::operator()(const trgraph::Edge* from, - const trgraph::Node* n, - const trgraph::Edge* to) const { - if (!from) return 0; - - uint32_t c = from->pl().getCost(); - - if (c == std::numeric_limits::max()) return c; - - if (from == _lastFrom) { - // the transit line simi calculation is independent of the "to" edge, so if - // the last "from" edge was the same, skip it! - c = _lastC; - } else if (!_noLineSimiPen) { - const auto& simi = transitLineSimi(from); - - if (!simi.nameSimilar) { - if (_rOpts.lineUnmatchedPunishFact < 1) { - c = std::ceil(static_cast(c) * _rOpts.lineUnmatchedPunishFact); - } else if (_rOpts.lineUnmatchedPunishFact > 1) { - double a = - std::round(static_cast(c) * _rOpts.lineUnmatchedPunishFact); - if (a > std::numeric_limits::max()) - return std::numeric_limits::max(); - c = a; - } - } - - if (!simi.fromSimilar) { - if (_rOpts.lineNameFromUnmatchedPunishFact < 1) { - c = std::ceil(static_cast(c) * - _rOpts.lineNameFromUnmatchedPunishFact); - } else if (_rOpts.lineNameFromUnmatchedPunishFact > 1) { - double a = std::round(static_cast(c) * - _rOpts.lineNameFromUnmatchedPunishFact); - if (a > std::numeric_limits::max()) - return std::numeric_limits::max(); - c = a; - } - } - - if (!simi.toSimilar) { - if (_rOpts.lineNameToUnmatchedPunishFact < 1) { - c = std::ceil(static_cast(c) * - _rOpts.lineNameToUnmatchedPunishFact); - } else if (_rOpts.lineNameToUnmatchedPunishFact > 1) { - double a = std::round(static_cast(c) * - _rOpts.lineNameToUnmatchedPunishFact); - if (a > std::numeric_limits::max()) - return std::numeric_limits::max(); - c = a; - } - } - - _lastC = c; - _lastFrom = from; - } - - uint32_t overflowCheck = c; - - if (n && !n->pl().isTurnCycle()) { - if (_rOpts.fullTurnPunishFac != 0 && from->getFrom() == to->getTo() && - from->getTo() == to->getFrom()) { - // trivial full turn - c += _rOpts.fullTurnPunishFac; - - if (c <= overflowCheck) return std::numeric_limits::max(); - overflowCheck = c; - } else if (_rOpts.fullTurnPunishFac != 0 && n->getDeg() > 2) { - // otherwise, only intersection angles will be punished - - double ang = util::geo::innerProd( - *n->pl().getGeom(), from->pl().backHop(), to->pl().frontHop()); - - if (ang < _rOpts.fullTurnAngle) { - c += _rOpts.fullTurnPunishFac; - if (c <= overflowCheck) return std::numeric_limits::max(); - overflowCheck = c; - } - } - - // turn restriction cost - if (_rOpts.turnRestrCost > 0 && from->pl().isRestricted() && - !_res.may(from, to, n)) { - c += _rOpts.turnRestrCost; - if (c <= overflowCheck) return std::numeric_limits::max(); - } - } - - return c; -} - -// _____________________________________________________________________________ -LineSimilarity ExpoTransWeight::CostFunc::transitLineSimi( - const trgraph::Edge* e) const { - if (_rAttrs.shortName.empty() && _rAttrs.lineFrom.empty() && - _rAttrs.lineTo.empty()) - return {true, true, true}; - - LineSimilarity best = {false, false, false}; - for (const auto* l : e->pl().getLines()) { - auto simi = _rAttrs.simi(l); - if (simi.nameSimilar && simi.toSimilar && simi.fromSimilar) return simi; - if (best < simi) best = simi; - } - - return best; -} - -// _____________________________________________________________________________ -double ExpoTransWeight::weight(uint32_t c, double d, double t0, double d0, - const RoutingOpts& rOpts) { - UNUSED(t0); - UNUSED(d); - UNUSED(d0); - return rOpts.transitionPen * static_cast(c) / 10.0; -} - -// _____________________________________________________________________________ -uint32_t ExpoTransWeight::invWeight(double c, const RoutingOpts& rOpts) { - return std::round((c / rOpts.transitionPen) * 10.0); -} - -// _____________________________________________________________________________ -uint32_t ExpoTransWeight::maxCost(double tTime, const RoutingOpts& rOpts) { - // abort after 3 times the scheduled time, but assume a min time of - // 1 minute! - return std::ceil(fmax(tTime, 60) * 3.0 * rOpts.lineUnmatchedPunishFact * - rOpts.lineNameToUnmatchedPunishFact * - rOpts.lineNameFromUnmatchedPunishFact * 10); -} - -// _____________________________________________________________________________ - -// _____________________________________________________________________________ -double NormDistrTransWeight::weight(uint32_t cs, double d, double t0, double d0, - const RoutingOpts& rOpts) { - UNUSED(d); - UNUSED(d0); - UNUSED(rOpts); - - double t = static_cast(cs) / 10.0; - - // standard deviation of normal distribution - double standarddev = 1; - - // no backwards time travel! - if (t0 < 0) return std::numeric_limits::infinity(); - - // always assume it takes at least 10 seconds to travel - t0 = fmax(10, t0); - - double cNorm = (t / t0 - 1) / standarddev; - double normWeight = cNorm * cNorm; - - double expWeight = ExpoTransWeight::weight(cs, d, t0, d0, rOpts); - - return normWeight + expWeight; -} - -// _____________________________________________________________________________ -uint32_t NormDistrTransWeight::invWeight(double c, const RoutingOpts& rOpts) { - UNUSED(rOpts); - UNUSED(c); - - throw(std::runtime_error("Cannot apply inv weight to DistDiffTransWeight")); -} - -// _____________________________________________________________________________ - -// _____________________________________________________________________________ -double DistDiffTransWeight::weight(uint32_t c, double d, double t0, double d0, - const RoutingOpts& rOpts) { - UNUSED(t0); - UNUSED(c); - - double w = fabs(d - d0); - - return rOpts.transitionPen * w; -} - -// _____________________________________________________________________________ -uint32_t DistDiffTransWeight::invWeight(double c, const RoutingOpts& rOpts) { - UNUSED(rOpts); - UNUSED(c); - - throw(std::runtime_error("Cannot apply inv weight to DistDiffTransWeight")); -} - -// _____________________________________________________________________________ -uint32_t DistDiffTransWeight::maxCost(double tTime, const RoutingOpts& rOpts) { - UNUSED(tTime); - UNUSED(rOpts); - return std::numeric_limits::max(); -} diff --git a/src/pfaedle/router/Weights.h b/src/pfaedle/router/Weights.h deleted file mode 100644 index 1a616f5..0000000 --- a/src/pfaedle/router/Weights.h +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef PFAEDLE_ROUTER_WEIGHTS_H_ -#define PFAEDLE_ROUTER_WEIGHTS_H_ - -#include -#include "pfaedle/osm/Restrictor.h" -#include "pfaedle/router/Misc.h" -#include "pfaedle/router/RoutingAttrs.h" -#include "pfaedle/trgraph/Graph.h" -#include "util/graph/EDijkstra.h" - -namespace pfaedle { -namespace router { - -typedef util::graph::EDijkstra::CostFunc - RCostFunc; -typedef util::graph::EDijkstra::HeurFunc - RHeurFunc; - -class ExpoTransWeight { - public: - struct CostFunc : public RCostFunc { - CostFunc(const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, - const osm::Restrictor& res, uint32_t max) - : _rAttrs(rAttrs), - _rOpts(rOpts), - _res(res), - _inf(max), - _noLineSimiPen(false), - _lastFrom(0) { - if (_rAttrs.lineFrom.empty() && _rAttrs.lineTo.empty() && - _rAttrs.shortName.empty()) { - _noLineSimiPen = true; - } - if (_rOpts.lineUnmatchedPunishFact == 1) { - _noLineSimiPen = true; - } - } - - const RoutingAttrs& _rAttrs; - const RoutingOpts& _rOpts; - const osm::Restrictor& _res; - uint32_t _inf; - bool _noLineSimiPen; - mutable const trgraph::Edge* _lastFrom; - mutable uint32_t _lastC = 0; - - uint32_t operator()(const trgraph::Edge* from, const trgraph::Node* n, - const trgraph::Edge* to) const; - uint32_t inf() const { return _inf; } - - LineSimilarity transitLineSimi(const trgraph::Edge* e) const; - }; - - struct DistHeur : RHeurFunc { - DistHeur(double maxV, const RoutingOpts& rOpts, - const std::set& tos); - - const RoutingOpts& _rOpts; - double _maxV; - POINT _center; - double _maxCentD; - uint32_t operator()(const trgraph::Edge* a, - const std::set& b) const; - mutable const trgraph::Edge* _lastE; - mutable uint32_t _lastC = 0; - }; - - static uint32_t maxCost(double tTime, const RoutingOpts& rOpts); - static double weight(uint32_t c, double d, double t0, double d0, - const RoutingOpts& rOpts); - static uint32_t invWeight(double cost, const RoutingOpts& rOpts); - static const bool ALLOWS_FAST_ROUTE = true; - static const bool NEED_DIST = false; -}; - -class ExpoTransWeightNoHeur : public ExpoTransWeight { - public: - struct DistHeur : RHeurFunc { - DistHeur(double maxV, const RoutingOpts& rOpts, - const std::set& tos) { - UNUSED(maxV); - UNUSED(rOpts); - UNUSED(tos); - } - - uint32_t operator()(const trgraph::Edge* a, - const std::set& b) const { - UNUSED(a); - UNUSED(b); - return 0; - } - }; -}; - -class NormDistrTransWeight : public ExpoTransWeight { - public: - static double weight(uint32_t c, double d, double t0, double d0, - const RoutingOpts& rOpts); - static uint32_t invWeight(double cost, const RoutingOpts& rOpts); - static const bool ALLOWS_FAST_ROUTE = false; - static const bool NEED_DIST = false; -}; - -class NormDistrTransWeightNoHeur : public NormDistrTransWeight { - public: - struct DistHeur : RHeurFunc { - DistHeur(double maxV, const RoutingOpts& rOpts, - const std::set& tos) { - UNUSED(maxV); - UNUSED(rOpts); - UNUSED(tos); - } - - uint32_t operator()(const trgraph::Edge* a, - const std::set& b) const { - UNUSED(a); - UNUSED(b); - return 0; - } - }; -}; - -class DistDiffTransWeight : public ExpoTransWeight { - public: - static uint32_t maxCost(double tTime, const RoutingOpts& rOpts); - static double weight(uint32_t c, double d, double t0, double d0, - const RoutingOpts& rOpts); - static uint32_t invWeight(double cost, const RoutingOpts& rOpts); - static const bool ALLOWS_FAST_ROUTE = false; - static const bool NEED_DIST = true; -}; - -class DistDiffTransWeightNoHeur : public DistDiffTransWeight { - public: - struct DistHeur : RHeurFunc { - DistHeur(double maxV, const RoutingOpts& rOpts, - const std::set& tos) { - UNUSED(maxV); - UNUSED(rOpts); - UNUSED(tos); - } - - uint32_t operator()(const trgraph::Edge* a, - const std::set& b) const { - UNUSED(a); - UNUSED(b); - return 0; - } - }; -}; - -} // namespace router -} // namespace pfaedle - -#endif // PFAEDLE_ROUTER_WEIGHTS_H_ diff --git a/src/pfaedle/statsimi-classifier/StatsimiClassifier.cpp b/src/pfaedle/statsimi-classifier/StatsimiClassifier.cpp deleted file mode 100644 index ca90e39..0000000 --- a/src/pfaedle/statsimi-classifier/StatsimiClassifier.cpp +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#include -#include -#include -#include "pfaedle/Def.h" -#include "pfaedle/statsimi-classifier/StatsimiClassifier.h" -#include "util/geo/Geo.h" - -using pfaedle::statsimiclassifier::BTSClassifier; -using pfaedle::statsimiclassifier::EDClassifier; -using pfaedle::statsimiclassifier::JaccardClassifier; -using pfaedle::statsimiclassifier::JaccardGeodistClassifier; -using pfaedle::statsimiclassifier::PEDClassifier; - -// _____________________________________________________________________________ -bool JaccardGeodistClassifier::similar(const std::string& nameA, - const POINT& posA, - const std::string& nameB, - const POINT& posB) const { - const double THRES_M = - 0.00815467271246994481; // ln 2/85 from statsimi evaluation - const double THRES_JACC = .5; // from statsimi evaluation - - const double m = exp(-THRES_M * util::geo::haversine(posA, posB)); - double jacc = util::jaccardSimi(nameA, nameB); - - if (jacc > THRES_JACC) - jacc = .5 + (jacc - THRES_JACC) / (2.0 * (1.0 - THRES_JACC)); - else - jacc = jacc / (2.0 * THRES_JACC); - - return ((m + jacc) / 2.0) > 0.5; -} - -// _____________________________________________________________________________ -bool JaccardGeodistClassifier::similar(const std::string& nameA, - const std::string& nameB) const { - return util::jaccardSimi(nameA, nameB) > 0.45; // 0.45 from statsimi paper -} - -// _____________________________________________________________________________ -bool JaccardClassifier::similar(const std::string& nameA, const POINT& posA, - const std::string& nameB, - const POINT& posB) const { - UNUSED(posA); - UNUSED(posB); - return similar(nameA, nameB); -} - -// _____________________________________________________________________________ -bool JaccardClassifier::similar(const std::string& nameA, - const std::string& nameB) const { - return util::jaccardSimi(nameA, nameB) > 0.45; // 0.45 from statsimi paper -} - -// _____________________________________________________________________________ -bool BTSClassifier::similar(const std::string& nameA, const POINT& posA, - const std::string& nameB, const POINT& posB) const { - UNUSED(posA); - UNUSED(posB); - return similar(nameA, nameB); -} - -// _____________________________________________________________________________ -bool BTSClassifier::similar(const std::string& nameA, - const std::string& nameB) const { - return util::btsSimi(nameA, nameB) > 0.85; // 0.85 from statsimi paper -} - -// _____________________________________________________________________________ -bool EDClassifier::similar(const std::string& nameA, const POINT& posA, - const std::string& nameB, const POINT& posB) const { - UNUSED(posA); - UNUSED(posB); - return similar(nameA, nameB); -} - -// _____________________________________________________________________________ -bool EDClassifier::similar(const std::string& nameA, - const std::string& nameB) const { - double edSimi = 1.0 - ((util::editDist(nameA, nameB) * 1.0) / - fmax(nameA.size(), nameB.size())); - return edSimi > 0.85; // 0.85 from statsimi paper -} - -// _____________________________________________________________________________ -bool PEDClassifier::similar(const std::string& nameA, const POINT& posA, - const std::string& nameB, const POINT& posB) const { - UNUSED(posA); - UNUSED(posB); - return similar(nameA, nameB); -} - -// _____________________________________________________________________________ -bool PEDClassifier::similar(const std::string& nameA, - const std::string& nameB) const { - double a = (util::prefixEditDist(nameA, nameB) * 1.0) / (nameA.size() * 1.0); - double b = (util::prefixEditDist(nameB, nameA) * 1.0) / (nameB.size() * 1.0); - double pedSimi = 1.0 - fmin(a, b); - return pedSimi > 0.875; // 0.875 average of values from statsimi paper -} diff --git a/src/pfaedle/statsimi-classifier/StatsimiClassifier.h b/src/pfaedle/statsimi-classifier/StatsimiClassifier.h deleted file mode 100644 index cdde205..0000000 --- a/src/pfaedle/statsimi-classifier/StatsimiClassifier.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2020, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef PFAEDLE_STATSIMI_CLASSIFIER_STATSIMICLASSIFIER_H_ -#define PFAEDLE_STATSIMI_CLASSIFIER_STATSIMICLASSIFIER_H_ - -#include -#include "pfaedle/Def.h" -#include "util/geo/Geo.h" - -namespace pfaedle { -namespace statsimiclassifier { - -class StatsimiClassifier { - public: - virtual ~StatsimiClassifier() {} - virtual bool similar(const std::string& nameA, const POINT& posA, - const std::string& nameB, const POINT& posB) const = 0; - - virtual bool similar(const std::string& nameA, - const std::string& nameB) const = 0; -}; - -class JaccardClassifier : public StatsimiClassifier { - public: - virtual bool similar(const std::string& nameA, const POINT& posA, - const std::string& nameB, const POINT& posB) const; - virtual bool similar(const std::string& nameA, - const std::string& nameB) const; -}; - -class JaccardGeodistClassifier : public StatsimiClassifier { - public: - virtual bool similar(const std::string& nameA, const POINT& posA, - const std::string& nameB, const POINT& posB) const; - virtual bool similar(const std::string& nameA, - const std::string& nameB) const; -}; - -class BTSClassifier : public StatsimiClassifier { - public: - virtual bool similar(const std::string& nameA, const POINT& posA, - const std::string& nameB, const POINT& posB) const; - virtual bool similar(const std::string& nameA, - const std::string& nameB) const; -}; - -class EDClassifier : public StatsimiClassifier { - public: - virtual bool similar(const std::string& nameA, const POINT& posA, - const std::string& nameB, const POINT& posB) const; - virtual bool similar(const std::string& nameA, - const std::string& nameB) const; -}; - -class PEDClassifier : public StatsimiClassifier { - public: - virtual bool similar(const std::string& nameA, const POINT& posA, - const std::string& nameB, const POINT& posB) const; - virtual bool similar(const std::string& nameA, - const std::string& nameB) const; -}; - -} // namespace statsimiclassifier -} // namespace pfaedle - -#endif // PFAEDLE_STATSIMI_CLASSIFIER_STATSIMICLASSIFIER_H_ diff --git a/src/pfaedle/tests/CMakeLists.txt b/src/pfaedle/tests/CMakeLists.txt deleted file mode 100644 index deb3632..0000000 --- a/src/pfaedle/tests/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -add_executable(pfaedleTest TestMain.cpp) -target_link_libraries(pfaedleTest pfaedle_dep util) diff --git a/src/pfaedle/tests/TestMain.cpp b/src/pfaedle/tests/TestMain.cpp deleted file mode 100644 index 4187a9b..0000000 --- a/src/pfaedle/tests/TestMain.cpp +++ /dev/null @@ -1,329 +0,0 @@ -// Copyright 2020 -// Author: Patrick Brosi - -#include "pfaedle/osm/Restrictor.h" - -#define private public -#include "pfaedle/router/Router.h" -#undef private -#define private private - -using pfaedle::osm::Restrictor; -using pfaedle::router::CostMatrix; -using pfaedle::router::EdgeCandGroup; -using pfaedle::router::ExpoTransWeight; -using pfaedle::router::LayerCostsDAG; -using pfaedle::router::RouterImpl; -using pfaedle::router::RoutingAttrs; -using pfaedle::router::RoutingOpts; -using util::approx; - -// _____________________________________________________________________________ -uint32_t cmGet(const CostMatrix& m, size_t i, size_t j) { - for (const auto& e : m) { - if (e.first.first == i && e.first.second == j) return e.second; - } - - return -1; -} - -// _____________________________________________________________________________ -int main(int argc, char** argv) { - UNUSED(argc); - UNUSED(argv); - RouterImpl router; - - RoutingAttrs rAttrs; - RoutingOpts rOpts; - Restrictor restr; - LayerCostsDAG initCosts; - - // to make sure we always underestimate the cost in the heuristic for testing - pfaedle::trgraph::NodePL::comps.emplace_back( - pfaedle::trgraph::Component{9999999}); - - // build transit graph - pfaedle::trgraph::Graph g; - auto a = g.addNd(POINT{0, 0}); - auto b = g.addNd(POINT{0, 10}); - auto c = g.addNd(POINT{10, 0}); - auto d = g.addNd(POINT{20, 0}); - - a->pl().setComp(1); - b->pl().setComp(1); - c->pl().setComp(1); - d->pl().setComp(1); - - auto eA = g.addEdg(a, c); - auto eB = g.addEdg(b, c); - auto eC = g.addEdg(c, d); - - eA->pl().setCost(10); - eB->pl().setCost(6); - eC->pl().setCost(100); - - { - EdgeCandGroup froms, tos; - CostMatrix costM, dists; - froms.push_back({eA, 0, 0, {}, 0, {}}); - froms.push_back({eB, 0, 0, {}, 0, {}}); - tos.push_back({eC, 0, 0, {}, 0, {}}); - - double maxTime = 9999; - - pfaedle::router::HopCache c; - - router.hops(froms, tos, &costM, &dists, rAttrs, rOpts, restr, &c, maxTime); - - TEST(cmGet(costM, 0, 0), ==, approx(10)); - TEST(cmGet(costM, 1, 0), ==, approx(6)); - } - - { - EdgeCandGroup froms, tos; - CostMatrix costM, dists; - froms.push_back({eA, 0, 0, {}, 0, {}}); - froms.push_back({eB, 0, 0, {}, 0, {}}); - tos.push_back({eC, 0, 0.5, {}, 0, {}}); - - double maxTime = 9999; - - pfaedle::router::HopCache c; - - router.hops(froms, tos, &costM, &dists, rAttrs, rOpts, restr, &c, maxTime); - - TEST(cmGet(costM, 0, 0), ==, approx(50 + 10)); - TEST(cmGet(costM, 1, 0), ==, approx(50 + 6)); - } - - { - EdgeCandGroup froms, tos; - CostMatrix costM, dists; - froms.push_back({eA, 0, 0.5, {}, 0, {}}); - froms.push_back({eB, 0, 2.0 / 3.0, {}, 0, {}}); - tos.push_back({eC, 0, 0, {}, 0, {}}); - - double maxTime = 9999; - - pfaedle::router::HopCache c; - - router.hops(froms, tos, &costM, &dists, rAttrs, rOpts, restr, &c, maxTime); - - TEST(cmGet(costM, 0, 0), ==, approx(5)); - TEST(cmGet(costM, 1, 0), ==, approx(2)); - } - - { - EdgeCandGroup froms, tos; - CostMatrix costM, dists; - froms.push_back({eA, 0, 0.5, {}, 0, {}}); - froms.push_back({eB, 0, 2.0 / 3.0, {}, 0, {}}); - tos.push_back({eC, 0, 0.9, {}, 0, {}}); - - double maxTime = 9999; - - pfaedle::router::HopCache c; - - router.hops(froms, tos, &costM, &dists, rAttrs, rOpts, restr, &c, maxTime); - - TEST(cmGet(costM, 0, 0), ==, approx(90 + 5)); - TEST(cmGet(costM, 1, 0), ==, approx(90 + 2)); - } - - // with hopsfast - { - EdgeCandGroup froms, tos; - CostMatrix costM, dists; - froms.push_back({eA, 0, 0, {}, 0, {}}); - froms.push_back({eB, 0, 0, {}, 0, {}}); - tos.push_back({eC, 0, 0, {}, 0, {}}); - - LayerCostsDAG initCost{0, 0}; - - double maxTime = 9999; - - pfaedle::router::HopCache c; - - router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, - maxTime); - - TEST(cmGet(costM, 0, 0), >=, maxTime); - TEST(cmGet(costM, 1, 0), ==, approx(6)); - } - - { - EdgeCandGroup froms, tos; - CostMatrix costM, dists; - froms.push_back({eA, 0, 0, {}, 0, {}}); - froms.push_back({eB, 0, 0, {}, 0, {}}); - tos.push_back({eC, 0, 0.5, {}, 0, {}}); - - LayerCostsDAG initCost{0, 0}; - - double maxTime = 9999; - - pfaedle::router::HopCache c; - - router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, - maxTime); - - TEST(cmGet(costM, 0, 0), >=, maxTime); - TEST(cmGet(costM, 1, 0), ==, approx(50 + 6)); - } - - { - EdgeCandGroup froms, tos; - CostMatrix costM, dists; - froms.push_back({eA, 0, 0.5, {}, 0, {}}); - froms.push_back({eB, 0, 2.0 / 3.0, {}, 0, {}}); - tos.push_back({eC, 0, 0, {}, 0, {}}); - - LayerCostsDAG initCost{0, 0}; - - double maxTime = 9999; - - pfaedle::router::HopCache c; - - router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, - maxTime); - - TEST(cmGet(costM, 0, 0), >=, maxTime); - TEST(cmGet(costM, 1, 0), ==, approx(2)); - } - - { - EdgeCandGroup froms, tos; - CostMatrix costM, dists; - froms.push_back({eA, 0, 0.5, {}, 0, {}}); - froms.push_back({eB, 0, 2.0 / 3.0, {}, 0, {}}); - tos.push_back({eC, 0, 0.9, {}, 0, {}}); - - LayerCostsDAG initCost{0, 0}; - - double maxTime = 9999; - - pfaedle::router::HopCache c; - - router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, - maxTime); - - TEST(cmGet(costM, 0, 0), >=, maxTime); - TEST(cmGet(costM, 1, 0), ==, approx(90 + 2)); - } - - { - EdgeCandGroup froms, tos; - CostMatrix costM, dists; - froms.push_back({eA, 0, 0.5, {}, 0, {}}); - froms.push_back({eB, 0, 0, {}, 0, {}}); - tos.push_back({eC, 0, 0, {}, 0, {}}); - - LayerCostsDAG initCost{0, 0}; - - double maxTime = 9999; - - pfaedle::router::HopCache c; - - router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, - maxTime); - - TEST(cmGet(costM, 0, 0), ==, approx(5)); - TEST(cmGet(costM, 1, 0), >=, maxTime); - } - - { - EdgeCandGroup froms, tos; - CostMatrix costM, dists; - froms.push_back({eA, 0, 0.5, {}, 0, {}}); - froms.push_back({eB, 0, 0, {}, 0, {}}); - tos.push_back({eC, 0, 0, {}, 0, {}}); - - LayerCostsDAG initCost{9999, 0}; - - double maxTime = 9999; - - pfaedle::router::HopCache c; - - router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, - maxTime); - - TEST(cmGet(costM, 0, 0), ==, approx(5)); - TEST(cmGet(costM, 1, 0), >=, maxTime); - } - - { - EdgeCandGroup froms, tos; - CostMatrix costM, dists; - froms.push_back({eA, 0, 0.5, {}, 0, {}}); - froms.push_back({eA, 0, 0, {}, 0, {}}); - froms.push_back({eB, 0, 0, {}, 0, {}}); - tos.push_back({eC, 0, 0, {}, 0, {}}); - - LayerCostsDAG initCost{6, 0, 20}; - - double maxTime = 9999; - - pfaedle::router::HopCache c; - - router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, - maxTime); - - // we also get this, because the edge is the same! - TEST(cmGet(costM, 0, 0), ==, approx(5)); - TEST(cmGet(costM, 1, 0), ==, approx(10)); - TEST(cmGet(costM, 2, 0), >=, maxTime); - } - - { - EdgeCandGroup froms, tos; - CostMatrix costM, dists; - froms.push_back({eA, 0, 0.5, {}, 0, {}}); - froms.push_back({eA, 0, 0, {}, 0, {}}); - froms.push_back({eB, 0, 0, {}, 0, {}}); - tos.push_back({eC, 0, 1, {}, 0, {}}); - - LayerCostsDAG initCost{6, 0, 20}; - - double maxTime = 9999; - - pfaedle::router::HopCache c; - - router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, - maxTime); - - // we also get this, because the edge is the same! - TEST(cmGet(costM, 0, 0), ==, approx(5 + 100)); - TEST(cmGet(costM, 1, 0), ==, approx(10 + 100)); - TEST(cmGet(costM, 2, 0), >=, maxTime); - } - - { - EdgeCandGroup froms, tos; - CostMatrix costM, dists; - froms.push_back({eA, 0, 0.5, {}, 0, {}}); - froms.push_back({eA, 0, 0, {}, 0, {}}); - froms.push_back({eB, 0, 0, {}, 0, {}}); - - tos.push_back({eC, 0, 1, {}, 0, {}}); - tos.push_back({eC, 0, 0.5, {}, 0, {}}); - - LayerCostsDAG initCost{6, 0, 20}; - - double maxTime = 9999; - - pfaedle::router::HopCache c; - - router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, - maxTime); - - // we also get this, because the edge is the same! - TEST(cmGet(costM, 0, 0), ==, approx(5 + 100)); - TEST(cmGet(costM, 1, 0), ==, approx(10 + 100)); - TEST(cmGet(costM, 0, 1), ==, approx(5 + 50)); - TEST(cmGet(costM, 1, 1), ==, approx(10 + 50)); - TEST(cmGet(costM, 2, 0), >=, maxTime); - TEST(cmGet(costM, 2, 1), >=, maxTime); - } - - exit(0); -} diff --git a/src/pfaedle/trgraph/EdgePL.cpp b/src/pfaedle/trgraph/EdgePL.cpp index 7c5b613..cd6c241 100644 --- a/src/pfaedle/trgraph/EdgePL.cpp +++ b/src/pfaedle/trgraph/EdgePL.cpp @@ -11,12 +11,15 @@ using pfaedle::trgraph::EdgePL; using pfaedle::trgraph::TransitEdgeLine; + std::map EdgePL::_flines; std::map EdgePL::_tlines; // _____________________________________________________________________________ EdgePL::EdgePL() - : _oneWay(0), _hasRestr(false), _rev(false), _lvl(0), _cost(0), _l(0) { + : _length(0), _oneWay(0), _hasRestr(false), _rev(false), _lvl(0) { + _l = new LINE(); + _flines[_l] = 1; } // _____________________________________________________________________________ @@ -24,20 +27,17 @@ EdgePL::EdgePL(const EdgePL& pl) : EdgePL(pl, false) {} // _____________________________________________________________________________ EdgePL::EdgePL(const EdgePL& pl, bool geoflat) - : _oneWay(pl._oneWay), + : _length(pl._length), + _oneWay(pl._oneWay), _hasRestr(pl._hasRestr), _rev(pl._rev), - _lvl(pl._lvl), - _cost(pl._cost), - _l(0) { - if (pl._l) { - if (geoflat) { - _l = pl._l; - } else { - _l = new LINE(*pl._l); - } - _flines[_l]++; + _lvl(pl._lvl) { + if (geoflat) { + _l = pl._l; + } else { + _l = new LINE(*pl._l); } + _flines[_l]++; for (auto l : pl._lines) addLine(l); } @@ -75,23 +75,16 @@ EdgePL EdgePL::revCopy() const { } // _____________________________________________________________________________ -double EdgePL::getLength() const { - double len = 0; +void EdgePL::setLength(double d) { _length = d; } - for (size_t i = 1; i < _l->size(); i++) { - len += haversine((*_l)[i-1], (*_l)[i]); - } - - return len; -} +// _____________________________________________________________________________ +double EdgePL::getLength() const { return _length; } // _____________________________________________________________________________ void EdgePL::addLine(const TransitEdgeLine* l) { - auto lb = std::lower_bound(_lines.begin(), _lines.end(), l); - if (lb == _lines.end() || *lb != l) { + if (std::find(_lines.begin(), _lines.end(), l) == _lines.end()) { _lines.reserve(_lines.size() + 1); - lb = std::lower_bound(_lines.begin(), _lines.end(), l); - _lines.insert(lb, l); + _lines.push_back(l); if (_tlines.count(l)) _tlines[l]++; else @@ -110,13 +103,7 @@ const std::vector& EdgePL::getLines() const { } // _____________________________________________________________________________ -void EdgePL::addPoint(const POINT& p) { - if (!_l) { - _l = new LINE(); - _flines[_l] = 1; - } - _l->push_back(p); -} +void EdgePL::addPoint(const POINT& p) { _l->push_back(p); } // _____________________________________________________________________________ const LINE* EdgePL::getGeom() const { return _l; } @@ -127,9 +114,8 @@ LINE* EdgePL::getGeom() { return _l; } // _____________________________________________________________________________ util::json::Dict EdgePL::getAttrs() const { util::json::Dict obj; - obj["m_length"] = std::to_string(getLength()); + obj["m_length"] = std::to_string(_length); obj["oneway"] = std::to_string(static_cast(_oneWay)); - obj["cost"] = std::to_string(static_cast(_cost) / 10.0); obj["level"] = std::to_string(_lvl); obj["restriction"] = isRestricted() ? "yes" : "no"; @@ -166,10 +152,10 @@ void EdgePL::setOneWay(uint8_t dir) { _oneWay = dir; } void EdgePL::setOneWay() { _oneWay = 1; } // _____________________________________________________________________________ -uint32_t EdgePL::getCost() const { return _cost; } +void EdgePL::setLvl(uint8_t lvl) { _lvl = lvl; } // _____________________________________________________________________________ -void EdgePL::setCost(uint32_t c) { _cost = c; } +uint8_t EdgePL::lvl() const { return _lvl; } // _____________________________________________________________________________ void EdgePL::setRev() { _rev = true; } diff --git a/src/pfaedle/trgraph/EdgePL.h b/src/pfaedle/trgraph/EdgePL.h index 3c71fea..368ac2f 100644 --- a/src/pfaedle/trgraph/EdgePL.h +++ b/src/pfaedle/trgraph/EdgePL.h @@ -16,6 +16,8 @@ using util::geograph::GeoEdgePL; + + namespace pfaedle { namespace trgraph { @@ -26,17 +28,14 @@ struct TransitEdgeLine { std::string fromStr; std::string toStr; std::string shortName; - uint32_t color; }; inline bool operator==(const TransitEdgeLine& a, const TransitEdgeLine& b) { - // ignoring color here! return a.fromStr == b.fromStr && a.toStr == b.toStr && a.shortName == b.shortName; } inline bool operator<(const TransitEdgeLine& a, const TransitEdgeLine& b) { - // ignoring color here! return a.fromStr < b.fromStr || (a.fromStr == b.fromStr && a.toStr < b.toStr) || (a.fromStr == b.fromStr && a.toStr == b.toStr && @@ -66,20 +65,14 @@ class EdgePL { // Return the length in meters stored for this edge payload double getLength() const; + // Set the length in meters for this edge payload + void setLength(double d); + // Set this edge as a one way node, either in the default direction of // the edge (no arg), or the direction specified in dir void setOneWay(); void setOneWay(uint8_t dir); - void setLvl(uint8_t lvl) { assert(lvl < 9); _lvl = lvl; } - uint8_t lvl() const { return _lvl; } - - // Return the cost for this edge payload - uint32_t getCost() const; - - // Set the cost for this edge payload - void setCost(uint32_t d); - // Mark this payload' edge as having some restrictions void setRestricted(); @@ -92,6 +85,12 @@ class EdgePL { // True if this edge is restricted bool isRestricted() const; + // Set the level of this edge. + void setLvl(uint8_t lvl); + + // Return the level of this edge. + uint8_t lvl() const; + // Return the one-way code stored for this edge. uint8_t oneWay() const; @@ -116,11 +115,11 @@ class EdgePL { EdgePL revCopy() const; private: + float _length; uint8_t _oneWay : 2; bool _hasRestr : 1; bool _rev : 1; - uint8_t _lvl: 4; - uint32_t _cost; // costs in 1/10th seconds + uint8_t _lvl : 3; LINE* _l; diff --git a/src/pfaedle/trgraph/Graph.h b/src/pfaedle/trgraph/Graph.h index 7f7d50a..7e3baba 100644 --- a/src/pfaedle/trgraph/Graph.h +++ b/src/pfaedle/trgraph/Graph.h @@ -24,8 +24,8 @@ namespace trgraph { typedef util::graph::Edge Edge; typedef util::graph::Node Node; typedef util::graph::DirGraph Graph; -typedef Grid NodeGrid; -typedef Grid EdgeGrid; +typedef Grid NodeGrid; +typedef Grid EdgeGrid; } // namespace trgraph } // namespace pfaedle diff --git a/src/pfaedle/trgraph/NodePL.cpp b/src/pfaedle/trgraph/NodePL.cpp index b4bba8b..a0aec0a 100644 --- a/src/pfaedle/trgraph/NodePL.cpp +++ b/src/pfaedle/trgraph/NodePL.cpp @@ -3,19 +3,22 @@ // Authors: Patrick Brosi #include -#include -#include #include #include "pfaedle/trgraph/NodePL.h" +#include "pfaedle/trgraph/StatGroup.h" #include "pfaedle/trgraph/StatInfo.h" #include "util/String.h" -using pfaedle::trgraph::Component; -using pfaedle::trgraph::NodePL; using pfaedle::trgraph::StatInfo; +using pfaedle::trgraph::NodePL; +using pfaedle::trgraph::Component; -std::vector NodePL::comps; -std::vector NodePL::_statInfos; +// we use the adress of this dummy station info as a special value +// of this node, meaning "is a station block". Re-using the _si field here +// saves some memory +StatInfo NodePL::_blockerSI = StatInfo(); + +std::unordered_map NodePL::_comps; // _____________________________________________________________________________ NodePL::NodePL() @@ -29,6 +32,19 @@ NodePL::NodePL() { } +// _____________________________________________________________________________ +NodePL::NodePL(const NodePL& pl) + : _geom(pl._geom), + _si(0), + _component(pl._component) +#ifdef PFAEDLE_DBG + , + _vis(pl._vis) +#endif +{ + if (pl._si) setSI(*(pl._si)); +} + // _____________________________________________________________________________ NodePL::NodePL(const POINT& geom) : _geom(geom), @@ -54,6 +70,18 @@ NodePL::NodePL(const POINT& geom, const StatInfo& si) setSI(si); } +// _____________________________________________________________________________ +NodePL::~NodePL() { + if (getSI()) delete _si; + if (_component) { + _comps[_component]--; + if (_comps[_component] == 0) { + delete _component; + _comps.erase(_comps.find(_component)); + } + } +} + // _____________________________________________________________________________ void NodePL::setVisited() const { #ifdef PFAEDLE_DBG @@ -65,14 +93,18 @@ void NodePL::setVisited() const { void NodePL::setNoStat() { _si = 0; } // _____________________________________________________________________________ -const Component& NodePL::getComp() const { return comps[_component - 1]; } +const Component* NodePL::getComp() const { return _component; } // _____________________________________________________________________________ -uint32_t NodePL::getCompId() const { return _component; } +void NodePL::setComp(const Component* c) { + if (_component == c) return; + _component = c; -// _____________________________________________________________________________ -void NodePL::setComp(uint32_t id) { - _component = id; + // NOT thread safe! + if (!_comps.count(c)) + _comps[c] = 1; + else + _comps[c]++; } // _____________________________________________________________________________ @@ -84,59 +116,54 @@ void NodePL::setGeom(const POINT& geom) { _geom = geom; } // _____________________________________________________________________________ util::json::Dict NodePL::getAttrs() const { util::json::Dict obj; - obj["component"] = std::to_string(_component); + obj["component"] = std::to_string(reinterpret_cast(_component)); #ifdef PFAEDLE_DBG obj["dijkstra_vis"] = _vis ? "yes" : "no"; #endif if (getSI()) { obj["station_info_ptr"] = util::toString(_si); - obj["station_name"] = getSI()->getName(); - obj["station_alt_names"] = - util::implode(getSI()->getAltNames(), ","); - obj["station_platform"] = getSI()->getTrack(); + obj["station_name"] = _si->getName(); + obj["station_alt_names"] = util::implode(_si->getAltNames(), ","); + obj["from_osm"] = _si->isFromOsm() ? "yes" : "no"; + obj["station_platform"] = _si->getTrack(); + obj["station_group"] = + std::to_string(reinterpret_cast(_si->getGroup())); #ifdef PFAEDLE_STATION_IDS // only print this in debug mode - obj["station_id"] = getSI()->getId(); + obj["station_id"] = _si->getId(); #endif + + + std::stringstream gtfsIds; + if (_si->getGroup()) { + for (auto* s : _si->getGroup()->getStops()) { + gtfsIds << s->getId() << " (" << s->getName() << "),"; + } + } + + obj["station_group_stops"] = gtfsIds.str(); } return obj; } // _____________________________________________________________________________ -void NodePL::setSI(const StatInfo& si) { - _statInfos.emplace_back(si); - _si = _statInfos.size(); -} +void NodePL::setSI(const StatInfo& si) { _si = new StatInfo(si); } // _____________________________________________________________________________ const StatInfo* NodePL::getSI() const { if (isBlocker()) return 0; - if (isTurnCycle()) return 0; - if (_si == 0) return 0; - return &_statInfos[_si - 1]; + return _si; } // _____________________________________________________________________________ StatInfo* NodePL::getSI() { if (isBlocker()) return 0; - if (isTurnCycle()) return 0; - if (_si == 0) return 0; - return &_statInfos[_si - 1]; + return _si; } // _____________________________________________________________________________ -void NodePL::setTurnCycle() { _si = std::numeric_limits::max() - 1; } +void NodePL::setBlocker() { _si = &_blockerSI; } // _____________________________________________________________________________ -bool NodePL::isTurnCycle() const { - return _si == (std::numeric_limits::max() - 1); -} - -// _____________________________________________________________________________ -void NodePL::setBlocker() { _si = std::numeric_limits::max(); } - -// _____________________________________________________________________________ -bool NodePL::isBlocker() const { - return _si == std::numeric_limits::max(); -} +bool NodePL::isBlocker() const { return _si == &_blockerSI; } diff --git a/src/pfaedle/trgraph/NodePL.h b/src/pfaedle/trgraph/NodePL.h index 8d4efa9..7c1cdb0 100644 --- a/src/pfaedle/trgraph/NodePL.h +++ b/src/pfaedle/trgraph/NodePL.h @@ -8,7 +8,6 @@ #include #include #include -#include #include "ad/cppgtfs/gtfs/Feed.h" #include "pfaedle/Def.h" #include "pfaedle/trgraph/StatInfo.h" @@ -21,7 +20,7 @@ namespace pfaedle { namespace trgraph { struct Component { - float maxSpeed; + uint8_t minEdgeLvl : 3; }; /* @@ -30,8 +29,10 @@ struct Component { class NodePL { public: NodePL(); + NodePL(const NodePL& pl); // NOLINT NodePL(const POINT& geom); // NOLINT NodePL(const POINT& geom, const StatInfo& si); + ~NodePL(); // Return the geometry of this node. const POINT* getGeom() const; @@ -51,13 +52,10 @@ class NodePL { void setNoStat(); // Get the component of this node - const Component& getComp() const; - - // Get the component of this node - uint32_t getCompId() const; + const Component* getComp() const; // Set the component of this node - void setComp(uint32_t c); + void setComp(const Component* c); // Make this node a blocker void setBlocker(); @@ -65,27 +63,21 @@ class NodePL { // Check if this node is a blocker bool isBlocker() const; - // Make this node a turning cycle - void setTurnCycle(); - - // Check if this node is a blocker - bool isTurnCycle() const; - // Mark this node as visited (usefull for counting search space in Dijkstra) // (only works for DEBUG build type) void setVisited() const; - static std::vector comps; - private: POINT _geom; - uint32_t _si; - uint32_t _component; + StatInfo* _si; + const Component* _component; #ifdef PFAEDLE_DBG mutable bool _vis; #endif - static std::vector _statInfos; + + static StatInfo _blockerSI; + static std::unordered_map _comps; }; } // namespace trgraph } // namespace pfaedle diff --git a/src/pfaedle/trgraph/Normalizer.cpp b/src/pfaedle/trgraph/Normalizer.cpp index 5daaa80..5e254a0 100644 --- a/src/pfaedle/trgraph/Normalizer.cpp +++ b/src/pfaedle/trgraph/Normalizer.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,17 @@ Normalizer& Normalizer::operator=(Normalizer other) { return *this; } +// _____________________________________________________________________________ +std::string Normalizer::operator()(std::string sn) const { + return normTS(sn); +} + +// _____________________________________________________________________________ +std::string Normalizer::normTS(const std::string& sn) const { + std::lock_guard lock(_mutex); + return norm(sn); +} + // _____________________________________________________________________________ std::string Normalizer::norm(const std::string& sn) const { auto i = _cache.find(sn); diff --git a/src/pfaedle/trgraph/Normalizer.h b/src/pfaedle/trgraph/Normalizer.h index ae02d5a..6546963 100644 --- a/src/pfaedle/trgraph/Normalizer.h +++ b/src/pfaedle/trgraph/Normalizer.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace pfaedle { namespace trgraph { @@ -36,13 +37,19 @@ class Normalizer { // Normalize sn, not thread safe std::string norm(const std::string& sn) const; + // Normalize sn, thread safe + std::string normTS(const std::string& sn) const; + // Normalize sn based on the rules of this normalizer, uses the thread safe + // version of norm() internally + std::string operator()(std::string sn) const; bool operator==(const Normalizer& b) const; private: ReplRulesComp _rules; ReplRules _rulesOrig; mutable std::unordered_map _cache; + mutable std::mutex _mutex; void buildRules(const ReplRules& rules); }; diff --git a/src/pfaedle/trgraph/StatGroup.cpp b/src/pfaedle/trgraph/StatGroup.cpp new file mode 100644 index 0000000..75b45d4 --- /dev/null +++ b/src/pfaedle/trgraph/StatGroup.cpp @@ -0,0 +1,94 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#include +#include "pfaedle/trgraph/StatGroup.h" +#include "util/geo/Geo.h" + +using pfaedle::trgraph::StatGroup; +using pfaedle::trgraph::Node; +using pfaedle::router::NodeCandGroup; +using ad::cppgtfs::gtfs::Stop; + +// _____________________________________________________________________________ +StatGroup::StatGroup() {} + +// _____________________________________________________________________________ +void StatGroup::addStop(const Stop* s) { _stops.insert(s); } + +// _____________________________________________________________________________ +void StatGroup::addNode(trgraph::Node* n) { _nodes.insert(n); } + +// _____________________________________________________________________________ +void StatGroup::merge(StatGroup* other) { + if (other == this) return; + + std::set nds = other->getNodes(); + std::set stops = other->getStops(); + + for (auto on : nds) { + on->pl().getSI()->setGroup(this); + addNode(on); + } + + for (auto* os : stops) { + addStop(os); + } +} + +// _____________________________________________________________________________ +const NodeCandGroup& StatGroup::getNodeCands(const Stop* s) const { + return _stopNodePens.at(s); +} + +// _____________________________________________________________________________ +const std::set& StatGroup::getNodes() const { return _nodes; } + +// _____________________________________________________________________________ +void StatGroup::remNode(trgraph::Node* n) { + auto it = _nodes.find(n); + if (it != _nodes.end()) _nodes.erase(it); +} + +// _____________________________________________________________________________ +std::set& StatGroup::getNodes() { return _nodes; } + +// _____________________________________________________________________________ +const std::set& StatGroup::getStops() const { return _stops; } + +// _____________________________________________________________________________ +double StatGroup::getPen(const Stop* s, trgraph::Node* n, + const trgraph::Normalizer& platformNorm, + double trackPen, double distPenFac, + double nonOsmPen) const { + POINT p = + util::geo::latLngToWebMerc(s->getLat(), s->getLng()); + + double distPen = util::geo::webMercMeterDist(p, *n->pl().getGeom()); + distPen *= distPenFac; + + std::string platform = platformNorm.norm(s->getPlatformCode()); + + if (!platform.empty() && !n->pl().getSI()->getTrack().empty() && + n->pl().getSI()->getTrack() == platform) { + trackPen = 0; + } + + if (n->pl().getSI()->isFromOsm()) nonOsmPen = 0; + + return distPen + trackPen + nonOsmPen; +} + +// _____________________________________________________________________________ +void StatGroup::writePens(const trgraph::Normalizer& platformNorm, + double trackPen, double distPenFac, + double nonOsmPen) { + if (_stopNodePens.size()) return; // already written + for (auto* s : _stops) { + for (auto* n : _nodes) { + _stopNodePens[s].push_back(router::NodeCand{ + n, getPen(s, n, platformNorm, trackPen, distPenFac, nonOsmPen)}); + } + } +} diff --git a/src/pfaedle/trgraph/StatGroup.h b/src/pfaedle/trgraph/StatGroup.h new file mode 100644 index 0000000..a3341af --- /dev/null +++ b/src/pfaedle/trgraph/StatGroup.h @@ -0,0 +1,72 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_TRGRAPH_STATGROUP_H_ +#define PFAEDLE_TRGRAPH_STATGROUP_H_ + +#include +#include +#include +#include "ad/cppgtfs/gtfs/Feed.h" +#include "pfaedle/router/Router.h" +#include "pfaedle/trgraph/Graph.h" +#include "pfaedle/trgraph/Normalizer.h" + +namespace pfaedle { +namespace trgraph { + +using ad::cppgtfs::gtfs::Stop; + +/* + * A group of stations that belong together semantically (for example, multiple + * stop points of a larger bus station) + */ +class StatGroup { + public: + StatGroup(); + StatGroup(const StatGroup& a) = delete; + + // Add a stop s to this station group + void addStop(const Stop* s); + + // Add a node n to this station group + void addNode(trgraph::Node* n); + + // Return all nodes contained in this group + const std::set& getNodes() const; + std::set& getNodes(); + + // Return all stops contained in this group + const std::set& getStops() const; + + // Remove a node from this group + void remNode(trgraph::Node* n); + + // All nodes in other will be in this group, their SI's updated, and the + // "other" group deleted. + void merge(StatGroup* other); + + // Return node candidates for stop s from this group + const router::NodeCandGroup& getNodeCands(const Stop* s) const; + + // Write the penalties for all stops contained in this group so far. + void writePens(const trgraph::Normalizer& platformNorm, double trackPen, + double distPenFac, double nonOsmPen); + + private: + std::set _nodes; + std::set _stops; + + // for each stop in this group, a penalty for each of the nodes here, based on + // its distance and optionally the track number + std::unordered_map _stopNodePens; + + double getPen(const Stop* s, trgraph::Node* n, + const trgraph::Normalizer& norm, double trackPen, + double distPenFac, double nonOsmPen) const; +}; +} // namespace trgraph +} // namespace pfaedle + +#endif // PFAEDLE_TRGRAPH_STATGROUP_H_ diff --git a/src/pfaedle/trgraph/StatInfo.cpp b/src/pfaedle/trgraph/StatInfo.cpp index 132c985..0b642b6 100644 --- a/src/pfaedle/trgraph/StatInfo.cpp +++ b/src/pfaedle/trgraph/StatInfo.cpp @@ -3,24 +3,66 @@ // Authors: Patrick Brosi #include "pfaedle/router/Comp.h" +#include "pfaedle/trgraph/StatGroup.h" #include "pfaedle/trgraph/StatInfo.h" using pfaedle::trgraph::StatInfo; +using pfaedle::trgraph::StatGroup; + +std::unordered_map StatInfo::_groups; // _____________________________________________________________________________ -StatInfo::StatInfo() : _name(""), _track("") {} +StatInfo::StatInfo() : _name(""), _track(""), _fromOsm(false), _group(0) {} // _____________________________________________________________________________ StatInfo::StatInfo(const StatInfo& si) - : _name(si._name), _altNames(si._altNames), _track(si._track) { + : _name(si._name), + _altNames(si._altNames), + _track(si._track), + _fromOsm(si._fromOsm), + _group(0) { + setGroup(si._group); #ifdef PFAEDLE_STATION_IDS _id = si._id; #endif } // _____________________________________________________________________________ -StatInfo::StatInfo(const std::string& name, const std::string& track) - : _name(name), _track(track) {} +StatInfo::StatInfo(const std::string& name, const std::string& track, + bool fromOsm) + : _name(name), _track(track), _fromOsm(fromOsm), _group(0) {} + +// _____________________________________________________________________________ +StatInfo::~StatInfo() { unRefGroup(_group); } + +// _____________________________________________________________________________ +void StatInfo::unRefGroup(StatGroup* g) { + if (g) { + _groups[g]--; + if (_groups[g] == 0) { + // std::cout << "Deleting " << g << std::endl; + delete g; + _groups.erase(_groups.find(g)); + } + } +} + +// _____________________________________________________________________________ +void StatInfo::setGroup(StatGroup* g) { + if (_group == g) return; + unRefGroup(_group); + + _group = g; + + // NOT thread safe! + if (!_groups.count(g)) + _groups[g] = 1; + else + _groups[g]++; +} + +// _____________________________________________________________________________ +StatGroup* StatInfo::getGroup() const { return _group; } // _____________________________________________________________________________ const std::string& StatInfo::getName() const { return _name; } @@ -28,6 +70,12 @@ const std::string& StatInfo::getName() const { return _name; } // _____________________________________________________________________________ const std::string& StatInfo::getTrack() const { return _track; } +// _____________________________________________________________________________ +bool StatInfo::isFromOsm() const { return _fromOsm; } + +// _____________________________________________________________________________ +void StatInfo::setIsFromOsm(bool is) { _fromOsm = is; } + // _____________________________________________________________________________ double StatInfo::simi(const StatInfo* other) const { if (!other) return 0; diff --git a/src/pfaedle/trgraph/StatInfo.h b/src/pfaedle/trgraph/StatInfo.h index dcd5ba7..de0bbcf 100644 --- a/src/pfaedle/trgraph/StatInfo.h +++ b/src/pfaedle/trgraph/StatInfo.h @@ -6,20 +6,24 @@ #define PFAEDLE_TRGRAPH_STATINFO_H_ #include -#include #include +#include namespace pfaedle { namespace trgraph { +// forward declaration +class StatGroup; + /* - * Meta information (name, alternative names, track, ...) of a single stop + * Meta information (name, alternative names, track, group...) of a single stop */ class StatInfo { public: StatInfo(); StatInfo(const StatInfo& si); - StatInfo(const std::string& name, const std::string& track); + StatInfo(const std::string& name, const std::string& track, bool _fromOsm); + ~StatInfo(); // Return this stops names. const std::string& getName() const; @@ -39,6 +43,18 @@ class StatInfo { // Return the similarity between this stop and other double simi(const StatInfo* other) const; + // Set this stations group. + void setGroup(StatGroup* g); + + // Return this stations group. + StatGroup* getGroup() const; + + // True if this stop was from osm + bool isFromOsm() const; + + // Set this stop as coming from osm + void setIsFromOsm(bool is); + #ifdef PFAEDLE_STATION_IDS const std::string& getId() const { return _id; } void setId(const std::string& id) { _id = id; } @@ -48,12 +64,17 @@ class StatInfo { std::string _name; std::vector _altNames; std::string _track; + bool _fromOsm; + StatGroup* _group; #ifdef PFAEDLE_STATION_IDS // debug feature to store station ids from both OSM // and GTFS std::string _id; #endif + + static std::unordered_map _groups; + static void unRefGroup(StatGroup* g); }; } // namespace trgraph } // namespace pfaedle diff --git a/src/shapevl/CMakeLists.txt b/src/shapevl/CMakeLists.txt deleted file mode 100644 index bb55963..0000000 --- a/src/shapevl/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -file(GLOB_RECURSE shapevl_SRC *.cpp) - -set(shapevl_main ShapevlMain.cpp) - -list(REMOVE_ITEM shapevl_SRC ${shapevl_main}) - -include_directories( - ${PFAEDLE_INCLUDE_DIR} - SYSTEM ${LIBZIP_INCLUDE_DIR} - SYSTEM ${LIBZIP_CONF_INCLUDE_DIR} -) - -add_executable(shapevl ${shapevl_main}) -add_library(shapevl_dep ${shapevl_SRC}) - -include_directories(shapevl_dep PUBLIC ${PROJECT_SOURCE_DIR}/src/cppgtfs/src) -target_link_libraries(shapevl shapevl_dep util ad_cppgtfs -lpthread ${LIBZIP_LIBRARY}) diff --git a/src/shapevl/Collector.cpp b/src/shapevl/Collector.cpp deleted file mode 100644 index f75e940..0000000 --- a/src/shapevl/Collector.cpp +++ /dev/null @@ -1,485 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#include -#include -#include -#include -#include "ad/cppgtfs/gtfs/Feed.h" -#include "pfaedle/Def.h" -#include "shapevl/Collector.h" -#include "shapevl/Result.h" -#include "util/geo/Geo.h" -#include "util/geo/PolyLine.h" -#include "util/geo/output/GeoJsonOutput.h" -#include "util/log/Log.h" - -using util::geo::PolyLine; - -using ad::cppgtfs::gtfs::Shape; -using ad::cppgtfs::gtfs::Trip; -using pfaedle::eval::Collector; -using pfaedle::eval::Result; -using util::geo::output::GeoJsonOutput; - -// _____________________________________________________________________________ -double Collector::add(const Trip* oldT, const Shape* oldS, const Trip* newT, - const Shape* newS) { - // This adds a new trip with a new shape to our evaluation. - _trips++; - - if (!oldS) { - // If there is no original shape, we cannot compare them - abort! - _noOrigShp++; - return 0; - } - - for (auto st : oldT->getStopTimes()) { - if (st.getShapeDistanceTravelled() < 0) { - // we cannot safely compare trips without shape dist travelled - // information - abort! - _noOrigShp++; - return 0; - } - } - - for (auto st : newT->getStopTimes()) { - if (st.getShapeDistanceTravelled() < 0) { - // we cannot safely compare trips without shape dist travelled - // information - abort! - _noOrigShp++; - return 0; - } - } - - double fd = 0; - - // A "segment" is a path from station s_i to station s_{i+1} - - size_t unmatchedSegments; // number of unmatched segments - double unmatchedSegmentsLength; // total _an. length of unmatched segments - - std::vector oldDists; - LINE oldL = getLine(oldS, &oldDists); - - std::vector newDists; - LINE newL = getLine(newS, &newDists); - - // check dist between anchor points - - if ((util::geo::latLngLen(oldL) * 1.0) / (oldL.size() * 1.0) > 1000) { - // most likely input with a degenerated shape - dont compare - _noOrigShp++; - return 0; - } - - if ((util::geo::latLngLen(newL) * 1.0) / (newL.size() * 1.0) > 1000) { - // most likely input with a degenerated shape - dont compare - _noOrigShp++; - return 0; - } - - std::vector> newLenDists; - std::vector> oldLenDists; - - auto oldSegs = segmentize(oldT, oldL, oldDists, newLenDists); - auto newSegs = segmentize(newT, newL, newDists, oldLenDists); - - for (const auto& p : oldLenDists) { - _distDiffs.push_back(fabs(p.first - p.second)); - _hopDists.push_back(p.first); - } - - // new lines build from cleaned-up shapes - LINE oldLCut; - LINE newLCut; - - for (auto oldL : oldSegs) - oldLCut.insert(oldLCut.end(), oldL.begin(), oldL.end()); - - for (auto newL : newSegs) { - newLCut.insert(newLCut.end(), newL.begin(), newL.end()); - } - - // convert (roughly) to degrees - double SEGL = 25.0 / util::geo::M_PER_DEG; - - double f = util::geo::webMercDistFactor(oldLCut.front()); - - // roughly half a meter - auto oldLCutS = - util::geo::simplify(oldLCut, f * (0.5 / util::geo::M_PER_DEG)); - auto newLCutS = - util::geo::simplify(newLCut, f * (0.5 / util::geo::M_PER_DEG)); - - auto old = _dCache.find(oldLCutS); - if (old != _dCache.end()) { - auto match = old->second.find(newLCutS); - if (match != old->second.end()) { - fd = match->second; - } else { - fd = util::geo::accFrechetDistCHav(oldLCutS, newLCutS, SEGL); - _dCache[oldLCutS][newLCutS] = fd; - } - } else { - fd = util::geo::accFrechetDistCHav(oldLCutS, newLCutS, SEGL); - _dCache[oldLCutS][newLCutS] = fd; - } - - auto dA = getDa(oldSegs, newSegs); - unmatchedSegments = dA.first; - unmatchedSegmentsLength = dA.second; - - double totL = 0; - for (auto l : oldSegs) totL += util::geo::latLngLen(l); - - // filter out shapes with a length of under 5 meters - they are most likely - // artifacts - if (totL < 5) { - _noOrigShp++; - return 0; - } - - _fdSum += fd / totL; - _unmatchedSegSum += unmatchedSegments; - _unmatchedSegLengthSum += unmatchedSegmentsLength; - - double avgFd = fd / totL; - double AN = static_cast(unmatchedSegments) / - static_cast(oldSegs.size()); - double AL = unmatchedSegmentsLength / totL; - - _results.insert(Result(oldT, avgFd)); - - if (AN <= 0.0001) _an0++; - if (AN <= 0.05) _an5++; - if (AN <= 0.1) _an10++; - if (AN <= 0.2) _an20++; - if (AN <= 0.3) _an30++; - if (AN <= 0.5) _an50++; - if (AN <= 0.7) _an70++; - if (AN <= 0.9) _an90++; - - LOG(VDEBUG) << "This result (" << oldT->getId() - << "): A_N/N = " << unmatchedSegments << "/" << oldSegs.size() - << " = " << AN << " A_L/L = " << unmatchedSegmentsLength << "/" - << totL << " = " << AL << " d_f = " << avgFd; - - if (_reportOut) { - (*_reportOut) << std::fixed << std::setprecision(6); - (*_reportOut) << oldT->getId() << "\t" << AN << "\t" << AL << "\t" << avgFd - << "\t" << util::geo::getWKT(oldSegs) << "\t" - << util::geo::getWKT(newSegs) << "\t" << oldT->getRoute()->getShortName() << "\t"; - - for (const auto& st : oldT->getStopTimes()) { - (*_reportOut) << st.getStop()->getName() << "\t" - << st.getStop()->getLat() << "\t" - << st.getStop()->getLng() << "\t"; - } -(*_reportOut) << "\n"; - } - - return avgFd; -} - -// _____________________________________________________________________________ -std::vector Collector::segmentize( - const Trip* t, const LINE& shape, const std::vector& dists, - std::vector>& lenDist) { - std::vector ret; - - if (t->getStopTimes().size() < 2) return ret; - - POLYLINE pl(shape); - std::vector cuts; - - size_t i = 0; - for (const auto& st : t->getStopTimes()) { - cuts.push_back(st.getShapeDistanceTravelled()); - i++; - } - - - size_t to = std::upper_bound(dists.begin(), dists.end(), cuts[0]) - - dists.begin(); - - POINT lastP; - if (to >= dists.size()) { - lastP = shape.back(); - } else if (to == 0) { - lastP = shape.front(); - } else { - double progr = (cuts[0] - dists[to - 1]) / (dists[to] - dists[to - 1]); - lastP = shape[to - 1]; - lastP.setX(lastP.getX() + progr * (shape[to].getX() - shape[to-1].getX())); - lastP.setY(lastP.getY() + progr * (shape[to].getY() - shape[to-1].getY())); - } - - for (size_t i = 1; i < cuts.size(); i++) { - size_t to = std::upper_bound(dists.begin(), dists.end(), cuts[i]) - - dists.begin(); - - POINT curP; - if (to >= dists.size()) { - curP = shape.back(); - } else if (to == 0) { - curP = shape.front(); - } else { - curP = shape[to - 1]; - double progr = (cuts[i] - dists[to - 1]) / (dists[to] - dists[to - 1]); - curP.setX(curP.getX() + progr * (shape[to].getX() - shape[to-1].getX())); - curP.setY(curP.getY() + progr * (shape[to].getY() - shape[to-1].getY())); - } - - auto curL = pl.getSegment(lastP, curP).getLine(); - - double dist = - util::geo::haversine(t->getStopTimes()[i - 1].getStop()->getLat(), - t->getStopTimes()[i - 1].getStop()->getLng(), - t->getStopTimes()[i].getStop()->getLat(), - t->getStopTimes()[i].getStop()->getLng()); - double len = util::geo::latLngLen(curL); - lenDist.push_back({dist, len}); - - ret.push_back(curL); - lastP = curP; - } - - return ret; -} - -// _____________________________________________________________________________ -LINE Collector::getLine(const Shape* s, std::vector* dists) { - LINE ret; - - for (size_t i = 0; i < s->getPoints().size(); i++) { - ret.push_back({s->getPoints()[i].lng, s->getPoints()[i].lat}); - (*dists).push_back(s->getPoints()[i].travelDist); - } - return ret; -} - -// _____________________________________________________________________________ -const std::set& Collector::getResults() const { return _results; } - -// _____________________________________________________________________________ -double Collector::getAvgDist() const { return _fdSum / _results.size(); } - -// _____________________________________________________________________________ -void Collector::printCsv(std::ostream* os, - const std::set& result) const { - for (auto r : result) (*os) << r.getDist() << "\n"; -} - -// _____________________________________________________________________________ -double Collector::getAcc() const { - return static_cast(_an0) / static_cast(_results.size()); -} - -// _____________________________________________________________________________ -void Collector::printShortStats(std::ostream* os) const { - if (_results.size()) { - (*os) << (static_cast(_an0) / - static_cast(_results.size())) * - 100 - << ","; - (*os) << (static_cast(_an5) / - static_cast(_results.size())) * - 100 - << ","; - (*os) << (static_cast(_an10) / - static_cast(_results.size())) * - 100 - << ","; - (*os) << (static_cast(_an20) / - static_cast(_results.size())) * - 100 - << ","; - (*os) << (static_cast(_an30) / - static_cast(_results.size())) * - 100 - << ","; - (*os) << (static_cast(_an50) / - static_cast(_results.size())) * - 100 - << ","; - (*os) << (static_cast(_an70) / - static_cast(_results.size())) * - 100 - << ","; - (*os) << (static_cast(_an90) / - static_cast(_results.size())) * - 100; - } -} - -// _____________________________________________________________________________ -void Collector::printStats(std::ostream* os) const { - (*os) << std::setfill(' ') << std::setw(50) << " # of trips: " << _trips - << "\n"; - (*os) << std::setfill(' ') << std::setw(50) - << " # of trips new shapes were matched for: " << _results.size() - << "\n"; - (*os) << std::setw(50) << " # of trips without input shapes: " << _noOrigShp - << "\n"; - - if (_results.size()) { - (*os) << std::setw(50) << " highest avg frechet distance to input shapes: " - << (--_results.end())->getDist() << " (on trip #" - << (--_results.end())->getTrip()->getId() << ")\n"; - (*os) << std::setw(50) << " lowest distance to input shapes: " - << (_results.begin())->getDist() << " (on trip #" - << (_results.begin())->getTrip()->getId() << ")\n"; - (*os) << std::setw(50) - << " averaged avg frechet distance: " << getAvgDist() << "\n"; - - (*os) << "\n"; - (*os) << " an-0: " - << (static_cast(_an0) / - static_cast(_results.size())) * - 100 - << " %" - << "\n"; - (*os) << " an-5: " - << (static_cast(_an5) / - static_cast(_results.size())) * - 100 - << " %" - << "\n"; - (*os) << " an-10: " - << (static_cast(_an10) / - static_cast(_results.size())) * - 100 - << " %" - << "\n"; - (*os) << " an-20: " - << (static_cast(_an20) / - static_cast(_results.size())) * - 100 - << " %" - << "\n"; - (*os) << " acc-30: " - << (static_cast(_an30) / - static_cast(_results.size())) * - 100 - << " %" - << "\n"; - (*os) << " acc-50: " - << (static_cast(_an50) / - static_cast(_results.size())) * - 100 - << " %" - << "\n"; - (*os) << " acc-70: " - << (static_cast(_an70) / - static_cast(_results.size())) * - 100 - << " %" - << "\n"; - (*os) << " acc-90: " - << (static_cast(_an90) / - static_cast(_results.size())) * - 100 - << " %" - << "\n"; - } - - (*os) << std::endl; -} - -// _____________________________________________________________________________ -std::map Collector::getStats() { - std::map stats; - - if (_distDiffs.size()) { - auto i = _distDiffs.begin() + _distDiffs.size() / 2; - - // std::nth_element makes a partial sort of the first n elements - std::nth_element(_distDiffs.begin(), i, _distDiffs.end()); - - stats["median-dist-diff"] = *i; - } else { - stats["median-dist-diff"] = -1; - } - - if (_hopDists.size()) { - double s = 0; - for (auto d : _hopDists) s += d; - - stats["avg-hop-dist"] = s / (_hopDists.size() * 1.0); - } else { - stats["avg-hop-dist"] = -1; - } - - stats["num-trips"] = _trips; - stats["num-trips-matched"] = _results.size(); - stats["num-trips-wo-shapes"] = _noOrigShp; - stats["avg-fr"] = getAvgDist(); - if (_results.size()) { - stats["max-avg-frech-dist"] = (--_results.end())->getDist(); - } else { - stats["max-avg-frech-dist"] = -1; - } - stats["an-0"] = - (static_cast(_an0) / static_cast(_results.size())) * 100; - stats["an-5"] = - (static_cast(_an5) / static_cast(_results.size())) * 100; - stats["an-10"] = - (static_cast(_an10) / static_cast(_results.size())) * 100; - stats["an-20"] = - (static_cast(_an20) / static_cast(_results.size())) * 100; - stats["an-30"] = - (static_cast(_an30) / static_cast(_results.size())) * 100; - stats["an-50"] = - (static_cast(_an50) / static_cast(_results.size())) * 100; - stats["an-70"] = - (static_cast(_an70) / static_cast(_results.size())) * 100; - stats["an-90"] = - (static_cast(_an90) / static_cast(_results.size())) * 100; - - return stats; -} - -// _____________________________________________________________________________ -std::pair Collector::getDa(const std::vector& a, - const std::vector& b) { - assert(a.size() == b.size()); - std::pair ret{0, 0}; - - // convert (roughly) to degrees - double SEGL = 25 / util::geo::M_PER_DEG; - - double MAX = 100; - - for (size_t i = 0; i < a.size(); i++) { - double fdMeter = 0; - - double f = util::geo::webMercDistFactor(a[i].front()); - - // roughly half a meter - auto aSimpl = util::geo::simplify(a[i], f * (0.5 / util::geo::M_PER_DEG)); - auto bSimpl = util::geo::simplify(b[i], f * (0.5 / util::geo::M_PER_DEG)); - - auto old = _dACache.find(aSimpl); - if (old != _dACache.end()) { - auto match = old->second.find(bSimpl); - if (match != old->second.end()) { - fdMeter = match->second; - } else { - fdMeter = util::geo::frechetDistHav(aSimpl, bSimpl, SEGL); - _dACache[aSimpl][bSimpl] = fdMeter; - } - } else { - fdMeter = util::geo::frechetDistHav(aSimpl, bSimpl, SEGL); - _dACache[aSimpl][bSimpl] = fdMeter; - } - - if (fdMeter >= MAX) { - ret.first++; - ret.second += util::geo::latLngLen(aSimpl); - } - } - - return ret; -} diff --git a/src/shapevl/Collector.h b/src/shapevl/Collector.h deleted file mode 100644 index 724f00d..0000000 --- a/src/shapevl/Collector.h +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef PFAEDLE_EVAL_COLLECTOR_H_ -#define PFAEDLE_EVAL_COLLECTOR_H_ - -#include -#include -#include -#include -#include -#include -#include -#include "ad/cppgtfs/gtfs/Feed.h" -#include "pfaedle/Def.h" -#include "shapevl/Result.h" -#include "util/geo/Geo.h" -#include "util/json/Writer.h" - -using ad::cppgtfs::gtfs::Shape; -using ad::cppgtfs::gtfs::Trip; - -namespace pfaedle { -namespace eval { - -struct lineCmp { - bool operator()(const LINE& a, const LINE& b) const { - if (a.size() != b.size()) { - return a.size() < b.size(); - } - - for (size_t i = 0; i < a.size(); i++) { - if (util::geo::dist(a[i], b[i]) > .00001) { - return (a[i].getX() < b[i].getX()) || - (a[i].getX() == b[i].getX() && a[i].getY() < b[i].getY()); - ; - } - } - - return false; - } -}; - -/* - * Collects routing results for evaluation - */ -class Collector { - public: - Collector(std::ostream* reportOut) - : _trips(0), - _noOrigShp(0), - _fdSum(0), - _unmatchedSegSum(0), - _unmatchedSegLengthSum(0), - _an0(0), - _an5(0), - _an10(0), - _an30(0), - _an50(0), - _an70(0), - _an90(0), - _reportOut(reportOut) {} - - // Add a shape found by our tool newS for a trip t with newly calculated - // station dist values with the old shape oldS - double add(const Trip* oldT, const Shape* oldS, const Trip* newT, - const Shape* newS); - - // Return the set of all Result objects - const std::set& getResults() const; - - // Print general stats to os - void printStats(std::ostream* os) const; - - // Print general stats to os - void printShortStats(std::ostream* os) const; - - // Get JSON stats - std::map getStats(); - - // Print a CSV for the results to os - void printCsv(std::ostream* os, const std::set& result) const; - - // Return the averaged average frechet distance - double getAvgDist() const; - - static LINE getLine(const Shape* s, std::vector* dists); - - double getAcc() const; - - private: - std::set _results; - std::map, lineCmp> _dCache; - std::map, lineCmp> _dACache; - - size_t _trips; - size_t _noOrigShp; - - std::vector _distDiffs; - std::vector _hopDists; - - double _fdSum; - size_t _unmatchedSegSum; - double _unmatchedSegLengthSum; - - size_t _an0; - size_t _an5; - size_t _an10; - size_t _an20; - size_t _an30; - size_t _an50; - size_t _an70; - size_t _an90; - - std::ostream* _reportOut; - - std::pair getDa(const std::vector& a, - const std::vector& b); - - static std::vector segmentize( - const Trip* t, const LINE& shape, const std::vector& dists, - std::vector>& lenDist); -}; - -} // namespace eval -} // namespace pfaedle - -#endif // PFAEDLE_EVAL_COLLECTOR_H_ diff --git a/src/shapevl/ShapevlMain.cpp b/src/shapevl/ShapevlMain.cpp deleted file mode 100644 index bade8d9..0000000 --- a/src/shapevl/ShapevlMain.cpp +++ /dev/null @@ -1,276 +0,0 @@ -// Copyright 2020, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#include -#include -#include -#include -#include -#include -#include -#include "ad/cppgtfs/Parser.h" -#include "pfaedle/router/TripTrie.h" -#include "shapevl/Collector.h" -#include "util/Misc.h" -#include "util/json/Writer.h" -#include "util/log/Log.h" - -using pfaedle::router::TripTrie; - -std::atomic count(0); - -// _____________________________________________________________________________ -void printHelp(int argc, char** argv) { - UNUSED(argc); - std::cout << "Usage: " << argv[0] - << " [-f ] -g [-s] " - << "\n"; - std::cout - << "\nAllowed arguments:\n -g Ground truth GTFS file\n"; - std::cout << " -s Only output summary\n"; - std::cout << " --json Output JSON\n"; - std::cout << " --avg Take avg of all inputs (only for --json)\n"; - std::cout << " -f Output full reports (per feed) to \n"; - std::cout - << " -m MOTs to match (GTFS MOT or string, default: all)\n"; -} - -// _____________________________________________________________________________ -void eval(const std::vector* paths, - std::vector* colls, - const std::set* mots, - const ad::cppgtfs::gtfs::Feed* evalFeed, bool unique) { - while (1) { - int myFeed = count-- - 1; - if (myFeed < 0) return; - std::string path = (*paths)[myFeed]; - LOG(DEBUG) << "Reading eval feed " << path << " ..."; - ad::cppgtfs::gtfs::Feed feed; - - try { - ad::cppgtfs::Parser p(path); - p.parse(&feed); - } catch (const ad::cppgtfs::ParserException& ex) { - LOG(ERROR) << "Could not parse GTFS feed " << path << ", reason was:"; - std::cerr << ex.what() << std::endl; - exit(1); - } - - std::vector trips; - - if (unique) { - std::map>> - forest; - for (auto t : evalFeed->getTrips()) { - auto& subForest = forest[t.second->getRoute()]; - bool ins = false; - for (auto& trie : subForest) { - if (trie.addTrip(t.second, - pfaedle::router::RoutingAttrs{ - t.second->getRoute()->getId(), "", ""}, - false, false)) { - ins = true; - break; - } - } - - if (!ins) { - subForest.resize(subForest.size() + 1); - subForest.back().addTrip(t.second, - pfaedle::router::RoutingAttrs{ - t.second->getRoute()->getId(), "", ""}, - false, false); - } - } - for (auto f : forest) { - for (auto sf : f.second) { - for (auto leaf : sf.getNdTrips()) { - // only one reference node - trips.push_back(leaf.second.front()); - } - } - } - } else { - for (auto t : evalFeed->getTrips()) { - trips.push_back(t.second); - } - } - - LOG(DEBUG) << "Evaluating " << path << "..."; - size_t i = 0; - for (const auto& oldTrip : trips) { - LOG(DEBUG) << "@ " << ++i << "/" << trips.size(); - if (!mots->count(oldTrip->getRoute()->getType())) continue; - auto newTrip = feed.getTrips().get(oldTrip->getId()); - if (!newTrip) { - LOG(ERROR) << "Trip #" << oldTrip->getId() << " not present in " << path - << ", skipping..."; - continue; - } - (*colls)[myFeed].add(oldTrip, oldTrip->getShape(), newTrip, - newTrip->getShape()); - } - } -} - -// _____________________________________________________________________________ -int main(int argc, char** argv) { - // disable output buffering for standard output - setbuf(stdout, NULL); - - // initialize randomness - srand(time(NULL) + rand()); // NOLINT - - std::string groundTruthFeedPath, motStr; - motStr = "all"; - ad::cppgtfs::gtfs::Feed groundTruthFeed; - std::string fullReportPath = ""; - std::vector evlFeedPaths; - std::set evlFeedPathsUniq; - std::vector evalColls; - std::vector reportStreams; - bool summarize = false; - bool json = false; - bool avg = false; - bool unique = false; - - for (int i = 1; i < argc; i++) { - std::string cur = argv[i]; - if (cur == "-h" || cur == "--help") { - printHelp(argc, argv); - exit(0); - } else if (cur == "-g") { - if (++i >= argc) { - LOG(ERROR) << "Missing argument for ground truth (-g)."; - exit(1); - } - groundTruthFeedPath = argv[i]; - } else if (cur == "-s") { - summarize = true; - } else if (cur == "--json") { - json = true; - } else if (cur == "--unique") { - unique = true; - } else if (cur == "--avg") { - avg = true; - } else if (cur == "-f") { - if (++i >= argc) { - LOG(ERROR) << "Missing argument for full reports (-f)."; - exit(1); - } - fullReportPath = argv[i]; - } else if (cur == "-m") { - if (++i >= argc) { - LOG(ERROR) << "Missing argument for mot (-m)."; - exit(1); - } - motStr = argv[i]; - } else { - char fullPath[PATH_MAX + 1]; - if (!realpath(cur.c_str(), fullPath)) { - LOG(ERROR) << "Error while reading " << fullPath; - exit(1); - } - evlFeedPathsUniq.insert(fullPath); - } - } - - for (const auto& feedPath : evlFeedPathsUniq) { - evlFeedPaths.push_back(feedPath); - if (fullReportPath.size()) { - reportStreams.emplace_back(); - reportStreams.back().exceptions(std::ios::failbit | std::ios::badbit); - reportStreams.back().open(fullReportPath + "/" + - util::split(feedPath, '/').back() + - ".fullreport.tsv"); - evalColls.push_back({&reportStreams.back()}); - } else { - evalColls.push_back({0}); - } - count++; - } - - if (groundTruthFeedPath.size() == 0) { - LOG(ERROR) << "No ground truth feed path given (-g)."; - exit(1); - } - - std::set mots = - ad::cppgtfs::gtfs::flat::Route::getTypesFromString(util::trim(motStr)); - - std::vector evlFeeds(evlFeedPaths.size()); - - try { - LOG(DEBUG) << "Reading ground truth feed" << groundTruthFeedPath << " ..."; - ad::cppgtfs::Parser p(groundTruthFeedPath); - p.parse(&groundTruthFeed); - } catch (const ad::cppgtfs::ParserException& ex) { - LOG(ERROR) << "Could not parse input GTFS feed, reason was:"; - std::cerr << ex.what() << std::endl; - exit(1); - } - - size_t THREADS = std::thread::hardware_concurrency(); - - std::vector thrds(THREADS); - for (auto& thr : thrds) - thr = std::thread(&eval, &evlFeedPaths, &evalColls, &mots, &groundTruthFeed, - unique); - - for (auto& thr : thrds) thr.join(); - - if (json) { - util::json::Dict stats = {}; - - for (size_t i = 0; i < evalColls.size(); i++) { - util::json::Dict locStats = {}; - for (const auto& kv : evalColls[i].getStats()) { - locStats[kv.first] = kv.second; - } - stats[evlFeedPaths[i]] = locStats; - } - - util::json::Dict jsonStats; - - if (evalColls.size() == 1) { - jsonStats = {{"statistics", stats[evlFeedPaths[0]]}}; - } else { - if (avg) { - double count = evalColls.size(); - std::vector keys; - for (const auto& a : evalColls[0].getStats()) { - keys.push_back(a.first); - } - util::json::Dict avgStats; - for (const auto& k : keys) { - double sum = 0; - for (size_t i = 0; i < evalColls.size(); i++) { - sum += evalColls[i].getStats()[k]; - } - avgStats[k] = sum / count; - } - jsonStats = {{"statistics", avgStats}}; - } else { - jsonStats = {{"statistics", stats}}; - } - } - - util::json::Writer wr(&std::cout, 10, true); - wr.val(jsonStats); - wr.closeAll(); - } else { - for (size_t i = 0; i < evalColls.size(); i++) { - if (summarize) { - std::cout << evlFeedPaths[i] << ": "; - evalColls[i].printShortStats(&std::cout); - std::cout << std::endl; - } else { - std::cout << " == Evaluation results for " << evlFeedPaths[i] - << " ===" << std::endl; - evalColls[i].printStats(&std::cout); - } - } - } -} diff --git a/src/util b/src/util deleted file mode 160000 index d1c30e9..0000000 --- a/src/util +++ /dev/null @@ -1 +0,0 @@ -Subproject commit d1c30e9ec4cb68803be073d35beb6af2b860bda4 diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt new file mode 100644 index 0000000..7888cd6 --- /dev/null +++ b/src/util/CMakeLists.txt @@ -0,0 +1,12 @@ +file(GLOB_RECURSE util_SRC *.cpp) +list(REMOVE_ITEM util_SRC TestMain.cpp) +add_library(util ${util_SRC}) + +find_package( ZLIB ) +if (ZLIB_FOUND) + include_directories( ${ZLIB_INCLUDE_DIRS} ) + target_link_libraries( util ${ZLIB_LIBRARIES} ) + add_definitions( -DZLIB_FOUND=${ZLIB_FOUND} ) +endif( ZLIB_FOUND ) + +add_subdirectory(tests) diff --git a/src/util/Misc.h b/src/util/Misc.h new file mode 100644 index 0000000..6bac94b --- /dev/null +++ b/src/util/Misc.h @@ -0,0 +1,136 @@ +// Copyright 2017, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef UTIL_MISC_H_ +#define UTIL_MISC_H_ + +#include +#include +#include +#include +#include +#include +#include + +#define UNUSED(expr) do { (void)(expr); } while (0) +#define TIME() std::chrono::high_resolution_clock::now() +#define TOOK(t1, t2) (std::chrono::duration_cast(t2 - t1).count() / 1000.0) +#define T_START(n) auto _tstart_##n = std::chrono::high_resolution_clock::now() +#define T_STOP(n) (std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - _tstart_##n).count() / 1000.0) + +namespace util { + +// cached first 10 powers of 10 +static int pow10[10] = { + 1, 10, 100, 1000, 10000, + 100000, 1000000, 10000000, 100000000, 1000000000}; + +// _____________________________________________________________________________ +inline uint64_t factorial(uint64_t n) { + if (n == 1) return n; + return n * factorial(n - 1); +} + +// _____________________________________________________________________________ +inline uint64_t atoul(const char* p) { + uint64_t ret = 0; + + while (*p) { + ret = ret * 10 + (*p++ - '0'); + } + + return ret; +} + +// _____________________________________________________________________________ +inline bool isFloatingPoint(const std::string& str) { + std::stringstream ss(str); + double f; + ss >> std::noskipws >> f; + return ss.eof() && ! ss.fail(); +} + +// _____________________________________________________________________________ +inline double atof(const char* p, uint8_t mn) { + // this atof implementation works only on "normal" float strings like + // 56.445 or -345.00, but should be faster than std::atof + double ret = 0.0; + bool neg = false; + if (*p == '-') { + neg = true; + p++; + } + + while (*p >= '0' && *p <= '9') { + ret = ret * 10.0 + (*p - '0'); + p++; + } + + if (*p == '.') { + p++; + double f = 0; + uint8_t n = 0; + + for (; n < mn && *p >= '0' && *p <= '9'; n++, p++) { + f = f * 10.0 + (*p - '0'); + } + + if (n < 10) + ret += f / pow10[n]; + else + ret += f / std::pow(10, n); + } + + if (neg) return -ret; + return ret; +} + +// _____________________________________________________________________________ +inline double atof(const char* p) { return atof(p, 38); } + +// _____________________________________________________________________________ +inline std::string getHomeDir() { + // parse implicit paths + const char* homedir = 0; + char* buf = 0; + + if ((homedir = getenv("HOME")) == 0) { + homedir = ""; + struct passwd pwd; + struct passwd* result; + size_t bufsize; + bufsize = sysconf(_SC_GETPW_R_SIZE_MAX); + if (bufsize == static_cast(-1)) bufsize = 0x4000; + buf = static_cast(malloc(bufsize)); + if (buf != 0) { + getpwuid_r(getuid(), &pwd, buf, bufsize, &result); + if (result != NULL) homedir = result->pw_dir; + } + } + + std::string ret(homedir); + if (buf) free(buf); + + return ret; +} + +// _____________________________________________________________________________ +inline std::string getTmpDir() { + // first, check if an env variable is set + const char* tmpdir = getenv("TMPDIR"); + if (tmpdir && std::strlen(tmpdir)) return std::string(tmpdir); + + // second, check if /tmp is writable + if (access("/tmp/", W_OK) == 0) return "/tmp"; + + // third, check if the cwd is writable + if (access(".", W_OK) == 0) return "."; + + // lastly, return the users home directory as a fallback + return getHomeDir(); +} + +} // namespace util + +#endif // UTIL_MISC_H_ diff --git a/src/util/Nullable.h b/src/util/Nullable.h new file mode 100644 index 0000000..069426f --- /dev/null +++ b/src/util/Nullable.h @@ -0,0 +1,116 @@ +// Copyright 2017, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#include + +#ifndef UTIL_NULLABLE_H_ +#define UTIL_NULLABLE_H_ + +namespace util { + +template +class Nullable { + public: + Nullable() + : val(), null(true) {} + Nullable(T* valPointer) + : val(), null(true) { + if (valPointer) { + assign(*valPointer); + } + } + Nullable(const T& value) + : val(value), null(false) {} + Nullable(const Nullable& other) + : val(other.val), null(other.isNull()) {} + + Nullable& operator=(const Nullable& other) { + if (!other.isNull()) val = other.get(); + null = other.isNull(); + return *this; + } + + T operator=(const T& other) { + assign(other); + return val; + } + + /** + * Passing through comparision operators + */ + + bool operator==(const Nullable& other) const { + return (other.isNull() && isNull()) || other.get() == get(); + } + + bool operator!=(const Nullable& other) const { + return !(*this == other); + } + + bool operator<(const Nullable& other) const { + return !other.isNull() && !isNull() && get() < other.get(); + } + + bool operator>(const Nullable& other) const { + return !(*this < other || *this == other); + } + + bool operator<=(const Nullable& other) const { + return *this < other || *this == other; + } + + bool operator>=(const Nullable& other) const { + return *this > other || *this == other; + } + + bool operator==(const T& other) const { + return !isNull() && other == get(); + } + + bool operator!=(const T& other) const { + return !(*this == other); + } + + bool operator<(const T& other) const { + return !isNull() && get() < other; + } + + bool operator>(const T& other) const { + return !(*this < other || *this == other); + } + + bool operator<=(const T& other) const { + return *this < other || *this == other; + } + + bool operator>=(const T& other) const { + return *this > other || *this == other; + } + + operator T() const { + return get(); + } + + bool isNull() const { + return null; + } + + T get() const { + if (!isNull()) return val; + else throw std::runtime_error("Trying to retrieve value of NULL object."); + } + +private: + void assign(T v) { + val = v; + null = false; + } + + T val; + bool null; +}; + +} + +#endif // UTIL_NULLABLE_H_ diff --git a/src/util/String.h b/src/util/String.h new file mode 100644 index 0000000..536b713 --- /dev/null +++ b/src/util/String.h @@ -0,0 +1,260 @@ +// Copyright 2017, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef UTIL_STRING_H_ +#define UTIL_STRING_H_ + +#include +#include +#include +#include +#include +#include + +namespace util { + +// _____________________________________________________________________________ +inline std::string urlDecode(const std::string& encoded) { + std::string decoded; + for (size_t i = 0; i < encoded.size(); ++i) { + char c = encoded[i]; + if (c == '%') { + std::string ah = encoded.substr(i + 1, 2); + char* nonProced = 0; + char hexVal = strtol(ah.c_str(), &nonProced, 16); + + if (ah.find_first_of("+-") > 1 && ah.size() - strlen(nonProced) == 2) { + c = hexVal; + i += 2; + } + } else if (c == '+') { + c = ' '; + } + decoded += c; + } + return decoded; +} + +// _____________________________________________________________________________ +inline std::string jsonStringEscape(const std::string& unesc) { + // modified code from + // http://stackoverflow.com/questions/7724448/simple-json-string-escape-for-c + std::ostringstream o; + for (auto c = unesc.cbegin(); c != unesc.cend(); c++) { + switch (*c) { + case '"': + o << "\\\""; + break; + case '\\': + o << "\\\\"; + break; + case '\b': + o << "\\b"; + break; + case '\f': + o << "\\f"; + break; + case '\n': + o << "\\n"; + break; + case '\r': + o << "\\r"; + break; + case '\t': + o << "\\t"; + break; + default: + if ('\x00' <= *c && *c <= '\x1f') { + o << "\\u" << std::hex << std::setw(4) << std::setfill('0') + << static_cast(*c); + } else { + o << *c; + } + } + } + return o.str(); +} + +// _____________________________________________________________________________ +inline bool replace(std::string& subj, const std::string& from, + const std::string& to) { + if (from.empty()) return false; + size_t start_pos = subj.find(from); + if (start_pos != std::string::npos) { + subj.replace(start_pos, from.length(), to); + return true; + } + + return false; +} + +// _____________________________________________________________________________ +inline bool replaceAll(std::string& subj, const std::string& from, + const std::string& to) { + if (from.empty()) return false; + bool found = false; + size_t s = subj.find(from, 0); + for (; s != std::string::npos; s = subj.find(from, s + to.length())) { + found = true; + subj.replace(s, from.length(), to); + } + + return found; +} + +// _____________________________________________________________________________ +inline std::string unixBasename(const std::string& pathname) { + return {std::find_if(pathname.rbegin(), pathname.rend(), + [](char c) { return c == '/'; }) + .base(), + pathname.end()}; +} + +// _____________________________________________________________________________ +template +inline std::string toString(T obj) { + std::stringstream ss; + ss << obj; + return ss.str(); +} + +// _____________________________________________________________________________ +inline std::vector split(std::string in, char sep) { + std::stringstream ss(in); + std::vector ret(1); + while (std::getline(ss, ret.back(), sep)) { + ret.push_back(""); + } + ret.pop_back(); + return ret; +} + +// _____________________________________________________________________________ +inline std::string ltrim(std::string str) { + str.erase(0, str.find_first_not_of(" \t\n\v\f\r")); + return str; +} + +// _____________________________________________________________________________ +inline std::string rtrim(std::string str) { + str.erase(str.find_last_not_of(" \t\n\v\f\r") + 1); + return str; +} + +// _____________________________________________________________________________ +inline std::string trim(std::string str) { return ltrim(rtrim(str)); } + +// _____________________________________________________________________________ +inline size_t editDist(const std::string& s1, const std::string& s2) { + // https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#C++ + size_t len1 = s1.size(); + size_t len2 = s2.size(); + std::vector cur(len2 + 1); + std::vector prev(len2 + 1); + + for (size_t i = 0; i < prev.size(); i++) prev[i] = i; + + for (size_t i = 0; i < len1; i++) { + cur[0] = i + 1; + for (size_t j = 0; j < len2; j++) { + cur[j + 1] = + std::min(prev[1 + j] + 1, + std::min(cur[j] + 1, prev[j] + (s1[i] == s2[j] ? 0 : 1))); + } + std::swap(cur, prev); + } + + return prev[len2]; +} + +// _____________________________________________________________________________ +inline size_t prefixEditDist(const std::string& prefix, const std::string& s, + size_t deltaMax) { + // https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#C++ + size_t len1 = prefix.size(); + size_t len2 = std::min(s.size(), prefix.size() + deltaMax + 1); + std::vector d((len1 + 1) * (len2 + 1)); + + d[0] = 0; + for (size_t i = 1; i <= len1; ++i) d[i * (len2 + 1)] = i; + for (size_t i = 1; i <= len2; ++i) d[i] = i; + + for (size_t i = 1; i <= len1; i++) { + for (size_t j = 1; j <= len2; j++) { + d[i * (len2 + 1) + j] = std::min(std::min(d[(i - 1) * (len2 + 1) + j] + 1, + d[i * (len2 + 1) + j - 1] + 1), + d[(i - 1) * (len2 + 1) + j - 1] + + (prefix[i - 1] == s[j - 1] ? 0 : 1)); + } + } + + // take min of last row + size_t deltaMin = std::max(std::max(deltaMax + 1, prefix.size()), s.size()); + for (size_t i = 0; i <= len2; i++) { + if (d[len1 * (len2 + 1) + i] < deltaMin) + deltaMin = d[len1 * (len2 + 1) + i]; + } + + return deltaMin; +} + +// _____________________________________________________________________________ +inline size_t prefixEditDist(const std::string& prefix, const std::string& s) { + return prefixEditDist(prefix, s, s.size()); +} + +// _____________________________________________________________________________ +inline std::string toUpper(std::string str) { + std::transform(str.begin(), str.end(), str.begin(), toupper); + return str; +} + +// _____________________________________________________________________________ +inline std::string toLower(std::string str) { + std::transform(str.begin(), str.end(), str.begin(), tolower); + return str; +} + +// _____________________________________________________________________________ +template +inline std::string implode(Iter begin, const Iter& end, const char* del) { + std::stringstream ss; + size_t i = 0; + while (begin != end) { + if (i != 0) ss << del; + ss << *begin; + begin++; + i++; + } + + return ss.str(); +} + +// _____________________________________________________________________________ +inline std::string normalizeWhiteSpace(const std::string& input) { + std::string ret; + bool ws = false; + for (size_t i = 0; i < input.size(); i++) { + if (std::isspace(input[i])) { + if (!ws) { + ret += " "; + ws = true; + } + continue; + } else { + ws = false; + ret += input[i]; + } + } + return ret; +} + +// _____________________________________________________________________________ +template +inline std::string implode(const std::vector& vec, const char* del) { + return implode(vec.begin(), vec.end(), del); +} +} + +#endif // UTIL_STRING_H_ diff --git a/src/util/geo/BezierCurve.h b/src/util/geo/BezierCurve.h new file mode 100644 index 0000000..23f3263 --- /dev/null +++ b/src/util/geo/BezierCurve.h @@ -0,0 +1,55 @@ +// Copyright 2016, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef UTIL_GEO_BEZIERCURVE_H_ +#define UTIL_GEO_BEZIERCURVE_H_ + +#include +#include "util/geo/Geo.h" +#include "util/geo/PolyLine.h" + +namespace util { +namespace geo { + +struct CubicPolynom { + CubicPolynom(double a, double b, double c, double d, double x) + : a(a), b(b), c(c), d(d), x(x) {} + CubicPolynom() : a(0), b(0), c(0), d(0), x(0) {} + double a, b, c, d, x; + + double valueAt(double x) const; +}; + +/** + * Bezier curve + */ +template +class BezierCurve { + public: + BezierCurve(const Point& a, const Point& b, const Point& c, + const Point& d); + + const PolyLine& render(double d); + + private: + double _d; + + // the x and y polynoms for this spline + CubicPolynom _xp, _yp; + + // store the rendered polyline for quicker access + PolyLine _rendered; + bool _didRender; + + void recalcPolynoms(const Point& x, const Point& b, const Point& c, + const Point& d); + + Point valueAt(double t) const; +}; + +#include "util/geo/BezierCurve.tpp" +} +} + +#endif // UTIL_GEO_BEZIERCURVE_H_ diff --git a/src/util/geo/BezierCurve.tpp b/src/util/geo/BezierCurve.tpp new file mode 100644 index 0000000..fb7e6ca --- /dev/null +++ b/src/util/geo/BezierCurve.tpp @@ -0,0 +1,70 @@ +// Copyright 2016, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +// _____________________________________________________________________________ +template +BezierCurve::BezierCurve(const Point& a, const Point& b, + const Point& c, const Point& d) + : _d(dist(a, d)) { + assert(_d > 0); + recalcPolynoms(a, b, c, d); +} + +// _____________________________________________________________________________ +template +void BezierCurve::recalcPolynoms(const Point& a, const Point& b, + const Point& c, const Point& d) { + _xp.a = a.getX(); + _xp.b = 3.0 * (b.getX() - a.getX()); + _xp.c = 3.0 * (c.getX() - b.getX()) - _xp.b; + _xp.d = d.getX() - a.getX() - _xp.c - _xp.b; + + _yp.a = a.getY(); + _yp.b = 3.0 * (b.getY() - a.getY()); + _yp.c = 3.0 * (c.getY() - b.getY()) - _yp.b; + _yp.d = d.getY() - a.getY() - _yp.c - _yp.b; + + _didRender = false; +} + +// _____________________________________________________________________________ +template +Point BezierCurve::valueAt(double t) const { + return Point(_xp.valueAt(t), _yp.valueAt(t)); +} + +// _____________________________________________________________________________ +template +const PolyLine& BezierCurve::render(double d) { + assert(d > 0); + if (_didRender) return _rendered; + + if (_d == 0) { + _rendered << Point(_xp.a, _yp.a) << Point(_xp.a, _yp.a); + return _rendered; + } + + _rendered.empty(); + double n = _d / d, dt = 1 / n, t = 0; + + bool cancel = false; + while (true) { + _rendered << valueAt(t); + t += dt; + if (cancel) break; + if (t > 1) { + t = 1; + cancel = true; + } + } + + _didRender = true; + return _rendered; +} + +// _____________________________________________________________________________ +double CubicPolynom::valueAt(double atx) const { + double dx = atx - x; + return a + b * dx + c * dx * dx + d * dx * dx * dx; +} diff --git a/src/util/geo/Box.h b/src/util/geo/Box.h new file mode 100644 index 0000000..b65f1d3 --- /dev/null +++ b/src/util/geo/Box.h @@ -0,0 +1,91 @@ +// Copyright 2016, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Patrick Brosi + +#ifndef UTIL_GEO_BOX_H_ +#define UTIL_GEO_BOX_H_ + +#include "./Point.h" + +namespace util { +namespace geo { + +template +class Box { + public: + // maximum inverse box as default value of box + Box() + : _ll(std::numeric_limits::max(), std::numeric_limits::max()), + _ur(std::numeric_limits::lowest(), std::numeric_limits::lowest()) {} + Box(const Point& ll, const Point& ur) : _ll(ll), _ur(ur) {} + const Point& getLowerLeft() const { return _ll; } + const Point& getUpperRight() const { return _ur; } + + Point& getLowerLeft() { return _ll; } + Point& getUpperRight() { return _ur; } + + void setLowerLeft(const Point& ll) { _ll = ll; } + void setUpperRight(const Point& ur) { _ur = ur; } + + bool operator==(const Box& b) const { + return getLowerLeft() == b.getLowerLeft() && + getUpperRight() == b.getUpperRight(); + } + + bool operator!=(const Box& p) const { return !(*this == p); } + + private: + Point _ll, _ur; +}; + +template +class RotatedBox { + public: + RotatedBox() : _box(), _deg(0), _center() {} + RotatedBox(const Box& box) + : _box(box), + _deg(0), + _center(Point( + (box.getUpperRight().getX() - box.getLowerLeft().getX()) / T(2), + (box.getUpperRight().getY() - box.getLowerLeft().getY()) / T(2))) {} + RotatedBox(const Point& ll, const Point& ur) + : _box(ll, ur), + _deg(0), + _center(Point((ur.getX() - ll.getX()) / T(2), + (ur.getY() - ll.getY()) / T(2))) {} + RotatedBox(const Box& box, double deg) + : _box(box), + _deg(deg), + _center(Point( + (box.getUpperRight().getX() - box.getLowerLeft().getX()) / T(2), + (box.getUpperRight().getY() - box.getLowerLeft().getY()) / T(2))) {} + RotatedBox(const Point& ll, const Point& ur, double deg) + : _box(ll, ur), + _deg(deg), + _center(Point((ur.getX() - ll.getX()) / T(2), + (ur.getY() - ll.getY()) / T(2))) {} + RotatedBox(const Box& box, double deg, const Point& center) + : _box(box), _deg(deg), _center(center) {} + RotatedBox(const Point& ll, const Point& ur, double deg, + const Point& center) + : _box(ll, ur), _deg(deg), _center(center) {} + + const Box& getBox() const { return _box; } + Box& getBox() { return _box; } + + double getDegree() const { return _deg; } + const Point& getCenter() const { return _center; } + Point& getCenter() { return _center; } + + void setDegree(double deg) { _deg = deg; } + + private: + Box _box; + double _deg; + Point _center; +}; + +} // namespace geo +} // namespace util + +#endif // UTIL_GEO_BOX_H_ diff --git a/src/util/geo/Geo.h b/src/util/geo/Geo.h new file mode 100644 index 0000000..ffbfada --- /dev/null +++ b/src/util/geo/Geo.h @@ -0,0 +1,1630 @@ +// Copyright 2016, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef UTIL_GEO_GEO_H_ +#define UTIL_GEO_GEO_H_ + +#define _USE_MATH_DEFINES + +#include +#include +#include +#include +#include +#include +#include "util/Misc.h" +#include "util/String.h" +#include "util/geo/Box.h" +#include "util/geo/Line.h" +#include "util/geo/Point.h" +#include "util/geo/Polygon.h" + +// ------------------- +// Geometry stuff +// ------------------ + +namespace util { +namespace geo { + +// convenience aliases + +typedef Point DPoint; +typedef Point FPoint; +typedef Point IPoint; + +typedef LineSegment DLineSegment; +typedef LineSegment FLineSegment; +typedef LineSegment ILineSegment; + +typedef Line DLine; +typedef Line FLine; +typedef Line ILine; + +typedef Box DBox; +typedef Box FBox; +typedef Box IBox; + +typedef Polygon DPolygon; +typedef Polygon FPolygon; +typedef Polygon IPolygon; + +const static double EPSILON = 0.00001; +const static double RAD = 0.017453292519943295; // PI/180 + +// _____________________________________________________________________________ +template +inline Box pad(const Box& box, double padding) { + return Box(Point(box.getLowerLeft().getX() - padding, + box.getLowerLeft().getY() - padding), + Point(box.getUpperRight().getX() + padding, + box.getUpperRight().getY() + padding)); +} + +// _____________________________________________________________________________ +template +inline Point centroid(const Point p) { + return p; +} + +// _____________________________________________________________________________ +template +inline Point centroid(const LineSegment ls) { + return Point((ls.first.getX() + ls.second.getX()) / T(2), + (ls.first.getY() + ls.second.getY()) / T(2)); +} + +// _____________________________________________________________________________ +template +inline Point centroid(const Line ls) { + double x = 0, y = 0; + for (const auto& p : ls) { + x += p.getX(); + y += p.getY(); + } + return Point(x / T(ls.size()), y / T(ls.size())); +} + +// _____________________________________________________________________________ +template +inline Point centroid(const Polygon ls) { + return centroid(ls.getOuter()); +} + +// _____________________________________________________________________________ +template +inline Point centroid(const Box box) { + return centroid(LineSegment(box.getLowerLeft(), box.getUpperRight())); +} + +// _____________________________________________________________________________ +template class Geometry> +inline Point centroid(std::vector> multigeo) { + Line a; + for (const auto& g : multigeo) a.push_back(centroid(g)); + return centroid(a); +} + +// _____________________________________________________________________________ +template +inline Point rotate(const Point& p, double deg) { + UNUSED(deg); + return p; +} + +// _____________________________________________________________________________ +template +inline Point rotate(Point p, double deg, const Point& c) { + deg *= -RAD; + double si = sin(deg); + double co = cos(deg); + p = p - c; + + return Point(p.getX() * co - p.getY() * si, + p.getX() * si + p.getY() * co) + + c; +} + +// _____________________________________________________________________________ +template +inline LineSegment rotate(LineSegment geo, double deg, + const Point& c) { + geo.first = rotate(geo.first, deg, c); + geo.second = rotate(geo.second, deg, c); + return geo; +} + +// _____________________________________________________________________________ +template +inline LineSegment rotate(LineSegment geo, double deg) { + return (geo, deg, centroid(geo)); +} + +// _____________________________________________________________________________ +template +inline Line rotate(Line geo, double deg, const Point& c) { + for (size_t i = 0; i < geo.size(); i++) geo[i] = rotate(geo[i], deg, c); + return geo; +} + +// _____________________________________________________________________________ +template +inline Polygon rotate(Polygon geo, double deg, const Point& c) { + for (size_t i = 0; i < geo.getOuter().size(); i++) + geo.getOuter()[i] = rotate(geo.getOuter()[i], deg, c); + return geo; +} + +// _____________________________________________________________________________ +template