diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..0d364d6 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,4 @@ +/build + +/Dockerfile +gtfs-out diff --git a/.forgejo/workflows/test.yml b/.forgejo/workflows/test.yml new file mode 100644 index 0000000..3344ec5 --- /dev/null +++ b/.forgejo/workflows/test.yml @@ -0,0 +1,33 @@ +name: GTFS Shapes Generation + +on: [push] + +jobs: + modify_routes: + runs-on: docker + steps: + - name: Download sample feed + run: curl -o sample-feed.zip https://download.data.public.lu/resources/horaires-et-arrets-des-transport-publics-gtfs/20240530-080402/gtfs-20240529-20240621.zip + + - name: Download OSM data + run: curl -o luxembourg-latest.osm.pbf https://download.geofabrik.de/europe/luxembourg-latest.osm.pbf + + - name: Convert OSM data to .osm + run: | + apt update && \ + apt install -y osmctools && \ + osmconvert luxembourg-latest.osm.pbf -o=luxembourg-latest.osm + + - name: Run Shape Generation + uses: gtfs-actions/generate-shapes@main + with: + gtfs_file: sample-feed.zip + osm_file: luxembourg-latest.osm + mot: bus + output_file: modified-feed.zip + + - name: Upload modified feed + uses: actions/upload-artifact@v3 + with: + name: modified-feed + path: modified-feed.zip \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 6bd5f05..5790aa1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,12 @@ [submodule "src/cppgtfs"] path = src/cppgtfs - url = https://ad-git.informatik.uni-freiburg.de/ad/cppgtfs.git + url = https://github.com/ad-freiburg/cppgtfs.git [submodule "src/xml"] path = src/xml - url = https://git.patrickbrosi.de/patrick/xmlparser + url = https://github.com/patrickbr/pfxml.git [submodule "src/configparser"] path = src/configparser url = https://git.patrickbrosi.de/patrick/configparser +[submodule "src/util"] + path = src/util + url = https://github.com/ad-freiburg/util diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index ac13003..0000000 --- a/.travis.yml +++ /dev/null @@ -1,24 +0,0 @@ -language: generic -sudo: false -dist: trusty - -addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - cmake - -before_script: - - mkdir build - - cd build - - cmake .. - -script: - - make -j4 - - make test - -notifications: - email: - on_success: never - on_failure: always diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b271bb..b045164 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,27 +1,24 @@ -cmake_minimum_required (VERSION 2.8) +cmake_minimum_required (VERSION 3.5) +set(CMAKE_CXX_STANDARD 11) project (pfaedle) -set(CPPLINT "${CMAKE_SOURCE_DIR}/cpplint.py") -include(cmake/cpplint.cmake) - -set(CPPLINT_PROJECT_ROOT "src") +if (CMAKE_BUILD_TYPE) + string(SUBSTRING ${CMAKE_BUILD_TYPE} 0 1 FIRST_CHAR) + string(TOUPPER ${FIRST_CHAR} FIRST_CHAR) + string(REGEX REPLACE "^.(.*)" "${FIRST_CHAR}\\1" CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}") +endif() enable_testing() set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/") -set(EXECUTABLE_OUTPUT_PATH "${CMAKE_SOURCE_DIR}/build") - - -find_package(OpenMP) -if(OPENMP_FOUND) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -endif() +set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}") # set compiler flags, see http://stackoverflow.com/questions/7724569/debug-vs-release-in-cmake -set(CMAKE_CXX_FLAGS "-fopenmp -Ofast -fno-signed-zeros -fno-trapping-math -frename-registers -Wall -Wno-format-extra-args -Wextra -Wformat-nonliteral -Wformat-security -Wformat=2") -set(CMAKE_CXX_FLAGS_DEBUG "-Og -g -DLOGLEVEL=3") +set(CMAKE_CXX_FLAGS "-Ofast -fno-signed-zeros -fno-trapping-math -Wall -Wno-format-extra-args -Wextra -Wformat-nonliteral -Wformat-security -Wformat=2 -Wextra -Wno-implicit-fallthrough -pedantic -Wno-keyword-macro") +set(CMAKE_CXX_FLAGS_SANITIZE "-Og -g -fsanitize=address -fsanitize=leak -fsanitize=undefined -DLOGLEVEL=3 -DPFAEDLE_DBG=1") +set(CMAKE_CXX_FLAGS_PROFILE "-g -pg -DLOGLEVEL=3 -DPFAEDLE_DBG=1") +set(CMAKE_CXX_FLAGS_DEBUG "-Og -g -DLOGLEVEL=3 -DPFAEDLE_DBG=1") set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -DLOGLEVEL=2") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -DLOGLEVEL=2") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -g -DLOGLEVEL=3") @@ -29,21 +26,27 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -g -DLOGLEVEL=3") # export compile commands to tools like clang set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -# Compiler-specific C++11 activation. -if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") - execute_process( - COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) - if ((GCC_VERSION VERSION_GREATER 4.8 OR GCC_VERSION VERSION_EQUAL 4.8)) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") - else () - message(FATAL_ERROR "${PROJECT_NAME} requires g++ 4.8 or greater!") - endif () -elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++ -std=c++11") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPFAEDLE_PRECISION=${PFAEDLE_PRECISION}") + +find_package(LibZip) +find_package(ZLIB) +find_package(BZip2) + +if (LIBZIP_FOUND) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLIBZIP_FOUND=1") +endif() + +if (ZLIB_FOUND) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DZLIB_FOUND=1") else () - message(FATAL_ERROR "Your C++ compiler does not support C++11.") -endif () + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPFXML_NO_ZLIB=1") +endif() + +if (BZIP2_FOUND) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBZLIB_FOUND=1") +else () + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPFXML_NO_BZLIB=1") +endif() # http://brianmilco.blogspot.de/2012/11/cmake-automatically-use-git-tags-as.html include(GetGitRevisionDescription) @@ -55,27 +58,32 @@ else() set(VERSION_GIT_FULL "${VERSION_GIT}-${VERSION_GIT_IS_DIRTY}") endif() +# Download submodules if needed + +if(NOT EXISTS ${CMAKE_SOURCE_DIR}/src/configparser/.git OR NOT EXISTS ${CMAKE_SOURCE_DIR}/src/cppgtfs/.git OR NOT EXISTS ${CMAKE_SOURCE_DIR}/src/xml/.git) + execute_process( + COMMAND git submodule update --init --recursive + ) +endif() + add_subdirectory(src) # tests add_test("utilTest" utilTest) - -# custom eval target - -add_custom_target( - eval - COMMAND make - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}//eval -) +add_test("pfaedleTest" pfaedleTest) # handles install target - install( - FILES README.md pfaedle.cfg DESTINATION share/${PROJECT_NAME} PERMISSIONS WORLD_READ + FILES pfaedle.cfg DESTINATION etc/${PROJECT_NAME} COMPONENT config PERMISSIONS OWNER_READ GROUP_READ WORLD_READ ) install( - FILES build/pfaedle DESTINATION bin - PERMISSIONS WORLD_EXECUTE + FILES ${CMAKE_BINARY_DIR}/pfaedle DESTINATION bin + PERMISSIONS OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE COMPONENT binaries +) + +install( + FILES ${CMAKE_BINARY_DIR}/shapevl DESTINATION bin + PERMISSIONS OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE COMPONENT binaries ) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d51404a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,25 @@ +FROM debian:bookworm-slim AS builder + +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y g++ cmake git libzip-dev zlib1g-dev libbz2-dev + +ADD . /app +RUN mkdir build && \ + cd build && \ + cmake .. && \ + make -j && \ + pwd && \ + make install + +FROM debian:bookworm-slim + +RUN apt-get update && \ + apt-get install -y libzip4 zlib1g libbz2-1.0 && \ + rm -rf /var/lib/apt/lists/* + +COPY --from=builder /usr/local/etc/pfaedle /usr/local/etc/pfaedle +COPY --from=builder /usr/local/bin/pfaedle /usr/local/bin/pfaedle + +ENTRYPOINT ["/usr/local/bin/pfaedle"] diff --git a/README.md b/README.md index 21f89a2..45384eb 100644 --- a/README.md +++ b/README.md @@ -1,42 +1,51 @@ -![Map-Matched path of a single train through Switzerland](geo/schweiz_mmatched.png?raw=true) +[![Left: station-to-station path of a single train through Switzerland obtained from schedule timetable data. Right: path of the same train map-matched by pfaedle.](geo/schweiz_ex_res.png?raw=true)](geo/schweiz_ex.png?raw=true) +*Left: station-to-station path of a single train through Switzerland obtained from official schedule data. Right: path of the same train map-matched by pfaedle.* -[![Build -Status](https://travis-ci.org/ad-freiburg/pfaedle.svg?branch=master)](https://travis-ci.org/ad-freiburg/pfaedle) +[![Left: station-to-station path of a single bus through Stuttgart obtained from official schedule data. Right: path of the same bus map-matched by pfaedle.](geo/stuttgart_ex_res.png?raw=true)](geo/stuttgart_ex.png?raw=true) +*Left: station-to-station path of a single bus through Stuttgart obtained from official schedule data. Right: path of the same bus map-matched by pfaedle.* + +[![Build](https://github.com/ad-freiburg/pfaedle/actions/workflows/build.yml/badge.svg)](https://github.com/ad-freiburg/pfaedle/actions/workflows/build.yml) # pfaedle -Precise map-matching for public transit schedules (GTFS data). +Precise OpenStreetMap (OSM) map-matching for public transit schedules ([GTFS](https://developers.google.com/transit/gtfs/reference/) data). +First described in [this 2018 SIGSPATIAL paper](http://ad-publications.informatik.uni-freiburg.de/SIGSPATIAL_Sparse%20map%20matching%202018.pdf). + +For a quick visual inspection of the shape quality, see for example the schedule data for Germany or Switzerland in our tool [TRAVIC](https://travic.app/?z=7&x=1261608.6&y=6430601.6). ## Requirements * `cmake` - * `gcc >= 4.8` + * `gcc >= 5.0` (or `clang >= 3.9`) + * `libzip` (*optional*, for ZIP support) + * `zlib` (*optional*, for gzip support) + * `libbz2` (*optional*, for bzip2 support) ## Building and Installation Fetch this repository and init submodules: -``` -git clone --recurse-submodules https://github.com/ad-freiburg/pfaedle +```shell +$ git clone --recurse-submodules https://github.com/ad-freiburg/pfaedle ``` -``` -mkdir build && cd build -cmake .. -make -j +```shell +$ mkdir build && cd build +$ cmake .. +$ make -j ``` To install, type -``` -make install +```shell +$ make install ``` # General Usage ## Generating shapes for a GTFS feed -``` -pfaedle -c -x +```shell +$ pfaedle -x ``` A shape'd version of the input GTFS feed will be written to `./gtfs-out`. @@ -44,26 +53,19 @@ A shape'd version of the input GTFS feed will be written to `./gtfs-out`. By default, shapes are only calculated for trips that don't have a shape in the input feed. To drop all existing shapes, use the `-D` flag. -For example, you may generate (and replace existing, see -D parameter) shapes for the GTFS dataset for Freiburg like this: +For example, you may generate (and replace existing, see `-D` flag) shapes for the GTFS dataset for Freiburg like this: -``` -$ mkdir freiburg_gtfs && cd freiburg_gtfs +```shell $ wget https://fritz.freiburg.de/csv_Downloads/VAGFR.zip -$ unzip VAGFR.zip $ wget http://download.geofabrik.de/europe/germany/baden-wuerttemberg/freiburg-regbez-latest.osm.bz2 -$ bunzip2 freiburg-regbez-latest.osm.bz2 -$ mkdir gtfs-out -$ pfaedle -D -c pfaedle.cfg -x freiburg-regbez-latest.osm . +$ pfaedle -D -x freiburg-regbez-latest.osm.bz2 VAGFR.zip ``` -A default configuration file `pfaedle.cfg` can be found in this repo. - - ## Generating shapes for a specific MOT -To generate shapes only for a specific mot, use the `-m` option. Possible -values are either `tram`, `bus`, `rail`, `subway`, `ferry`, `funicular`, -`gondola`, `all` (default). +To generate shapes for a specific mot only, use the `-m` option. Possible +values are either `tram`, `bus`, `coach`, `rail`, `subway`, `ferry`, `funicular`, +`gondola`, `all` (default) or GTFS route type codes (0, 1, 2, 3, 4, 5, 6, 7, or [extended route types](https://developers.google.com/transit/gtfs/reference/extended-route-types)). Integer codes will only match the specific route type, while string codes will match classes of route types. For example, `-m 101` will only match routes with `route_type` `101` (high speed rail), while `-m rail` will match any rail service encoded via a standard `route_type` `2` or an extended `route_type` describing a rail service (e.g. `100`, `101`, `102`, ...). Multiple values can be specified (comma separated). @@ -71,12 +73,30 @@ Multiple values can be specified (comma separated). `pfaedle` comes with the ability to filter OpenStreetMap data. If you specify the `-X` flag, `pfaedle` will filter the input OSM file and output a new OSM -file which contains *exactly* the data needed to calculate the shapes for the +file which contains exactly the data needed to calculate the shapes for the input GTFS feed and the input configuration. -This can be used to avoid parsing (for example) the entire world.osm on each +This can be used to avoid parsing (for example) the entire `planet.osm` on each run. +## via Docker + +You can use the [Docker image](https://github.com/orgs/ad-freiburg/packages/container/package/pfaedle) by mounting the OSM & GTFS data into the container: + +```shell +$ docker pull ghcr.io/ad-freiburg/pfaedle:latest +$ docker run -i --rm \ + # mount OSM data + --volume /path/to/osm/data:/osm \ + # mount GTFS data + --volume /path/to/gtfs/data:/gtfs \ + # mount default output folder gtfs-out + --volume /path/to/output-dir:/gtfs-out \ + ghcr.io/ad-freiburg/pfaedle:latest \ + # tell pfaedle where to find the data + -x /osm/osm-data.xml.bz2 -i /gtfs/myfeed.zip +``` + ## Debugging The following flags may be useful for debugging: @@ -84,39 +104,12 @@ The following flags may be useful for debugging: * `-T ` only calculate shape for a single trip (specified via its GTFS trip id) and output it as GeoJSON to `/path.json` * `--write-graph` write the graph used for routing as GeoJSON to - `/graph.json` - * `--write-cgraph` if `-T` is set, write the combination graph used for - routing as GeoJSON to `/combgraph.json` + * `--write-trgraph` write the complete network graph to `/trgraph.json` # Configuration -The main config file distributed with this repository is `pfaedle.cfg`. The -config file has some comments which hopefully explain the meaning behind the -parameters +A default configuration file `pfaedle.cfg` can be found in this repo and will be installed with `make install`. Custom configuration files can be specified with the `-c` flag. If no `-c` flag is set, `pfaedle` will parse and merge the following cfg files in the given order (if present): `/etc/pfaedle/pfaedle.cfg`, `$HOME/.config/pfaedle/pfaedle.cfg`, `/pfaedle.cfg`. Values given in later files will overwrite earlier defined values. -# Evaluation +# Attribution -You may run an entire evaluation of our testing datasets Vitoria-Gasteiz, Paris, Switzerland and -Stuttgart with - -``` -mkdir build && cd build -cmake .. -make -j -make eval -``` - -*Note:* this will download, and filter, the entire OSM files for Spain and the -Stuttgart region. Make sure you have enough space left on your hard drive. - -## Evaluation requirements - - * zlib - -On Debianesque systems, type - -``` -sudo apt-get install zlib1g-dev -``` - -to install the dependencies. +Note that the `shapes.txt` produced by `pfaedle` is based on OpenStreetMap data, which is licensed under ODbL 1.0 (see [here](https://osm.org/copyright)). If you copy, distribute, transmit or adapt the shapefied GTFS feed, please credit the contributors of OpenStreetMap. diff --git a/action.yml b/action.yml new file mode 100644 index 0000000..6fe43de --- /dev/null +++ b/action.yml @@ -0,0 +1,30 @@ +name: 'Generate shapes from OSM data' +description: 'Use pfaedle to generate shapes from OSM data for a GTFS feed.' + +inputs: + gtfs_file: + description: 'Path to GTFS .zip file.' + required: true + osm_file: + description: 'Path to OSM .pbf file.' + required: true + mot: + description: 'Mode of transport to generate shapes for.' + required: false + default: 'all' + output_file: + description: 'Path to output GTFS .zip file.' + required: true + +runs: + using: 'docker' + image: 'Dockerfile' + args: + - '-i' + - ${{ inputs.gtfs_file }} + - '-x' + - ${{ inputs.osm_file }} + - '-m' + - ${{ inputs.mot }} + - '-o' + - ${{ inputs.output_file }} \ No newline at end of file diff --git a/cmake/FindLibZip.cmake b/cmake/FindLibZip.cmake new file mode 100644 index 0000000..b6f4237 --- /dev/null +++ b/cmake/FindLibZip.cmake @@ -0,0 +1,52 @@ +# CMake module to search for libzip +# +# Once done this will define +# +# LIBZIP_FOUND - system has the zip library +# LIBZIP_INCLUDE_DIRS - the zip include directories +# LIBZIP_LIBRARY - Link this to use the zip library +# +# Copyright (c) 2017, Paul Blottiere, +# Copyright (c) 2017, Larry Shaffer, +# Add support for finding zipconf.h in separate location, e.g. on macOS +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +FIND_PATH(LIBZIP_INCLUDE_DIR + zip.h + "$ENV{LIB_DIR}/include" + "$ENV{INCLUDE}" + /usr/local/include + /usr/include +) + +FIND_PATH(LIBZIP_CONF_INCLUDE_DIR + zipconf.h + "$ENV{LIB_DIR}/include" + "$ENV{LIB_DIR}/lib/libzip/include" + "$ENV{LIB}/lib/libzip/include" + /usr/local/lib/libzip/include + /usr/lib/libzip/include + /usr/local/include + /usr/include + "$ENV{INCLUDE}" +) + +FIND_LIBRARY(LIBZIP_LIBRARY NAMES zip PATHS "$ENV{LIB_DIR}/lib" "$ENV{LIB}" /usr/local/lib /usr/lib ) + +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibZip DEFAULT_MSG + LIBZIP_LIBRARY LIBZIP_INCLUDE_DIR LIBZIP_CONF_INCLUDE_DIR) + +SET(LIBZIP_INCLUDE_DIRS ${LIBZIP_INCLUDE_DIR} ${LIBZIP_CONF_INCLUDE_DIR}) +MARK_AS_ADVANCED(LIBZIP_LIBRARY LIBZIP_INCLUDE_DIR LIBZIP_CONF_INCLUDE_DIR LIBZIP_INCLUDE_DIRS) + +IF (LIBZIP_FOUND) + MESSAGE(STATUS "Found libzip: ${LIBZIP_LIBRARY}") +ELSE (LIBZIP_FOUND) + SET(LIBZIP_LIBRARY "") + SET(LIBZIP_INCLUDE_DIR "") + SET(LIBZIP_CONF_INCLUDE_DIR "") + MESSAGE(STATUS "Could not find libzip") +ENDIF (LIBZIP_FOUND) diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake deleted file mode 100644 index 6751f0f..0000000 --- a/cmake/cpplint.cmake +++ /dev/null @@ -1,133 +0,0 @@ -# -# CMake module to C++ static analysis against -# Google C++ Style Guide (https://google.github.io/styleguide/cppguide.html) -# -# For more detials please follow links: -# -# - https://github.com/google/styleguide -# - https://pypi.python.org/pypi/cpplint -# - https://github.com/theandrewdavis/cpplint -# -# Copyright (c) 2016 Piotr L. Figlarek -# -# Usage -# ----- -# Include this module via CMake include(...) command and then add each source directory -# via introduced by this module cpplint_add_subdirectory(...) function. Added directory -# will be recursivelly scanned and all available files will be checked. -# -# Example -# ------- -# # include CMake module -# include(cmake/cpplint.cmake) -# -# # add all source code directories -# cpplint_add_subdirectory(core) -# cpplint_add_subdirectory(modules/c-bind) -# -# License (MIT) -# ------------- -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - - -# select files extensions to check -option(CPPLINT_TEST_C_FILES "Check *.c files" ON) -option(CPPLINT_TEST_H_FILES "Check *.h files" ON) -option(CPPLINT_TEST_CPP_FILES "Check *.cpp files" ON) -option(CPPLINT_TEST_HPP_FILES "Check *.hpp files" ON) -option(CPPLINT_TEST_TPP_FILES "Check *.tpp files" ON) - -# target to run cpplint.py for all configured sources -set(CPPLINT_TARGET lint CACHE STRING "Name of C++ style checker target") - -# project root directory -set(CPPLINT_PROJECT_ROOT "${PROJECT_SOURCE_DIR}" CACHE STRING "Project ROOT directory") - - -# find cpplint.py script -if(CPPLINT) - message(STATUS "cpplint parser: ${CPPLINT}") -else() - message(FATAL_ERROR "cpplint script: NOT FOUND! " - "Please set the CPPLINT variable.") -endif() - - -# common target to concatenate all cpplint.py targets -add_custom_target(${CPPLINT_TARGET} ALL) - - -# use cpplint.py to check source code files inside DIR directory -function(cpplint_add_subdirectory DIR) - # create relative path to the directory - set(ABSOLUTE_DIR ${CMAKE_CURRENT_LIST_DIR}/${DIR}) - - # add *.c files - if(CPPLINT_TEST_C_FILES) - set(EXTENSIONS ${EXTENSIONS}c,) - set(FILES_TO_CHECK ${FILES_TO_CHECK} ${ABSOLUTE_DIR}/*.c) - endif() - - # add *.h files - if(CPPLINT_TEST_H_FILES) - set(EXTENSIONS ${EXTENSIONS}h,) - set(FILES_TO_CHECK ${FILES_TO_CHECK} ${ABSOLUTE_DIR}/*.h) - endif() - - # add *.cpp files - if(CPPLINT_TEST_CPP_FILES) - set(EXTENSIONS ${EXTENSIONS}cpp,) - set(FILES_TO_CHECK ${FILES_TO_CHECK} ${ABSOLUTE_DIR}/*.cpp) - endif() - - # add *.hpp files - if(CPPLINT_TEST_HPP_FILES) - set(EXTENSIONS ${EXTENSIONS}hpp,) - set(FILES_TO_CHECK ${FILES_TO_CHECK} ${ABSOLUTE_DIR}/*.hpp) - endif() - - # add *.tpp files - if(CPPLINT_TEST_TPP_FILES) - set(EXTENSIONS ${EXTENSIONS}tpp,) - set(FILES_TO_CHECK ${FILES_TO_CHECK} ${ABSOLUTE_DIR}/*.tpp) - endif() - - # find all source files inside project - file(GLOB_RECURSE LIST_OF_FILES ${FILES_TO_CHECK}) - - # create valid target name for this check - string(REGEX REPLACE "/" "." TEST_NAME ${DIR}) - set(TARGET_NAME ${CPPLINT_TARGET}.${TEST_NAME}) - - # perform cpplint check - add_custom_target(${TARGET_NAME} - COMMAND ${CPPLINT} "--extensions=${EXTENSIONS}" - "--root=${CPPLINT_PROJECT_ROOT}" - "--quiet" - ${LIST_OF_FILES} - DEPENDS ${LIST_OF_FILES} - COMMENT "cpplint: Checking source code style" - ) - - # run this target when root cpplint.py test is triggered - add_dependencies(${CPPLINT_TARGET} ${TARGET_NAME}) - - # add this test to CTest - add_test(${TARGET_NAME} ${CMAKE_MAKE_PROGRAM} ${TARGET_NAME}) -endfunction() diff --git a/cpplint.py b/cpplint.py deleted file mode 100755 index db1c123..0000000 --- a/cpplint.py +++ /dev/null @@ -1,6233 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (c) 2009 Google Inc. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following disclaimer -# in the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Google Inc. nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -"""Does google-lint on c++ files. - -The goal of this script is to identify places in the code that *may* -be in non-compliance with google style. It does not attempt to fix -up these problems -- the point is to educate. It does also not -attempt to find all problems, or to ensure that everything it does -find is legitimately a problem. - -In particular, we can get very confused by /* and // inside strings! -We do a small hack, which is to ignore //'s with "'s after them on the -same line, but it is far from perfect (in either direction). -""" - -import codecs -import copy -import getopt -import math # for log -import os -import re -import sre_compile -import string -import sys -import unicodedata - -try: - xrange # Python 2 -except NameError: - xrange = range # Python 3 - - -_USAGE = """ -Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...] - [--counting=total|toplevel|detailed] [--root=subdir] - [--linelength=digits] [--headers=x,y,...] - [--quiet] - [file] ... - - The style guidelines this tries to follow are those in - https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml - - Every problem is given a confidence score from 1-5, with 5 meaning we are - certain of the problem, and 1 meaning it could be a legitimate construct. - This will miss some errors, and is not a substitute for a code review. - - To suppress false-positive errors of a certain category, add a - 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*) - suppresses errors of all categories on that line. - - The files passed in will be linted; at least one file must be provided. - Default linted extensions are .cc, .cpp, .cu, .cuh and .h. Change the - extensions with the --extensions flag. - - Flags: - - output=vs7 - By default, the output is formatted to ease emacs parsing. Visual Studio - compatible output (vs7) may also be used. Other formats are unsupported. - - verbose=# - Specify a number 0-5 to restrict errors to certain verbosity levels. - - quiet - Don't print anything if no errors are found. - - filter=-x,+y,... - Specify a comma-separated list of category-filters to apply: only - error messages whose category names pass the filters will be printed. - (Category names are printed with the message and look like - "[whitespace/indent]".) Filters are evaluated left to right. - "-FOO" and "FOO" means "do not print categories that start with FOO". - "+FOO" means "do print categories that start with FOO". - - Examples: --filter=-whitespace,+whitespace/braces - --filter=whitespace,runtime/printf,+runtime/printf_format - --filter=-,+build/include_what_you_use - - To see a list of all the categories used in cpplint, pass no arg: - --filter= - - counting=total|toplevel|detailed - The total number of errors found is always printed. If - 'toplevel' is provided, then the count of errors in each of - the top-level categories like 'build' and 'whitespace' will - also be printed. If 'detailed' is provided, then a count - is provided for each category like 'build/class'. - - root=subdir - The root directory used for deriving header guard CPP variable. - By default, the header guard CPP variable is calculated as the relative - path to the directory that contains .git, .hg, or .svn. When this flag - is specified, the relative path is calculated from the specified - directory. If the specified directory does not exist, this flag is - ignored. - - Examples: - Assuming that top/src/.git exists (and cwd=top/src), the header guard - CPP variables for top/src/chrome/browser/ui/browser.h are: - - No flag => CHROME_BROWSER_UI_BROWSER_H_ - --root=chrome => BROWSER_UI_BROWSER_H_ - --root=chrome/browser => UI_BROWSER_H_ - --root=.. => SRC_CHROME_BROWSER_UI_BROWSER_H_ - - linelength=digits - This is the allowed line length for the project. The default value is - 80 characters. - - Examples: - --linelength=120 - - extensions=extension,extension,... - The allowed file extensions that cpplint will check - - Examples: - --extensions=hpp,cpp - - headers=x,y,... - The header extensions that cpplint will treat as .h in checks. Values are - automatically added to --extensions list. - - Examples: - --headers=hpp,hxx - --headers=hpp - - cpplint.py supports per-directory configurations specified in CPPLINT.cfg - files. CPPLINT.cfg file can contain a number of key=value pairs. - Currently the following options are supported: - - set noparent - filter=+filter1,-filter2,... - exclude_files=regex - linelength=80 - root=subdir - headers=x,y,... - - "set noparent" option prevents cpplint from traversing directory tree - upwards looking for more .cfg files in parent directories. This option - is usually placed in the top-level project directory. - - The "filter" option is similar in function to --filter flag. It specifies - message filters in addition to the |_DEFAULT_FILTERS| and those specified - through --filter command-line flag. - - "exclude_files" allows to specify a regular expression to be matched against - a file name. If the expression matches, the file is skipped and not run - through liner. - - "linelength" allows to specify the allowed line length for the project. - - The "root" option is similar in function to the --root flag (see example - above). Paths are relative to the directory of the CPPLINT.cfg. - - The "headers" option is similar in function to the --headers flag - (see example above). - - CPPLINT.cfg has an effect on files in the same directory and all - sub-directories, unless overridden by a nested configuration file. - - Example file: - filter=-build/include_order,+build/include_alpha - exclude_files=.*\.cc - - The above example disables build/include_order warning and enables - build/include_alpha as well as excludes all .cc from being - processed by linter, in the current directory (where the .cfg - file is located) and all sub-directories. -""" - -# We categorize each error message we print. Here are the categories. -# We want an explicit list so we can list them all in cpplint --filter=. -# If you add a new error message with a new category, add it to the list -# here! cpplint_unittest.py should tell you if you forget to do this. -_ERROR_CATEGORIES = [ - 'build/class', - 'build/c++11', - 'build/c++14', - 'build/c++tr1', - 'build/deprecated', - 'build/endif_comment', - 'build/explicit_make_pair', - 'build/forward_decl', - 'build/header_guard', - 'build/include', - 'build/include_alpha', - 'build/include_order', - 'build/include_what_you_use', - 'build/namespaces', - 'build/printf_format', - 'build/storage_class', - 'legal/copyright', - 'readability/alt_tokens', - 'readability/braces', - 'readability/casting', - 'readability/check', - 'readability/constructors', - 'readability/fn_size', - 'readability/inheritance', - 'readability/multiline_comment', - 'readability/multiline_string', - 'readability/namespace', - 'readability/nolint', - 'readability/nul', - 'readability/strings', - 'readability/todo', - 'readability/utf8', - 'runtime/arrays', - 'runtime/casting', - 'runtime/explicit', - 'runtime/int', - 'runtime/init', - 'runtime/invalid_increment', - 'runtime/member_string_references', - 'runtime/memset', - 'runtime/indentation_namespace', - 'runtime/operator', - 'runtime/printf', - 'runtime/printf_format', - 'runtime/references', - 'runtime/string', - 'runtime/threadsafe_fn', - 'runtime/vlog', - 'whitespace/blank_line', - 'whitespace/braces', - 'whitespace/comma', - 'whitespace/comments', - 'whitespace/empty_conditional_body', - 'whitespace/empty_if_body', - 'whitespace/empty_loop_body', - 'whitespace/end_of_line', - 'whitespace/ending_newline', - 'whitespace/forcolon', - 'whitespace/indent', - 'whitespace/line_length', - 'whitespace/newline', - 'whitespace/operators', - 'whitespace/parens', - 'whitespace/semicolon', - 'whitespace/tab', - 'whitespace/todo', - ] - -# These error categories are no longer enforced by cpplint, but for backwards- -# compatibility they may still appear in NOLINT comments. -_LEGACY_ERROR_CATEGORIES = [ - 'readability/streams', - 'readability/function', - ] - -# The default state of the category filter. This is overridden by the --filter= -# flag. By default all errors are on, so only add here categories that should be -# off by default (i.e., categories that must be enabled by the --filter= flags). -# All entries here should start with a '-' or '+', as in the --filter= flag. -_DEFAULT_FILTERS = ['-build/include_alpha'] - -# The default list of categories suppressed for C (not C++) files. -_DEFAULT_C_SUPPRESSED_CATEGORIES = [ - 'readability/casting', - ] - -# The default list of categories suppressed for Linux Kernel files. -_DEFAULT_KERNEL_SUPPRESSED_CATEGORIES = [ - 'whitespace/tab', - ] - -# We used to check for high-bit characters, but after much discussion we -# decided those were OK, as long as they were in UTF-8 and didn't represent -# hard-coded international strings, which belong in a separate i18n file. - -# C++ headers -_CPP_HEADERS = frozenset([ - # Legacy - 'algobase.h', - 'algo.h', - 'alloc.h', - 'builtinbuf.h', - 'bvector.h', - 'complex.h', - 'defalloc.h', - 'deque.h', - 'editbuf.h', - 'fstream.h', - 'function.h', - 'hash_map', - 'hash_map.h', - 'hash_set', - 'hash_set.h', - 'hashtable.h', - 'heap.h', - 'indstream.h', - 'iomanip.h', - 'iostream.h', - 'istream.h', - 'iterator.h', - 'list.h', - 'map.h', - 'multimap.h', - 'multiset.h', - 'ostream.h', - 'pair.h', - 'parsestream.h', - 'pfstream.h', - 'procbuf.h', - 'pthread_alloc', - 'pthread_alloc.h', - 'rope', - 'rope.h', - 'ropeimpl.h', - 'set.h', - 'slist', - 'slist.h', - 'stack.h', - 'stdiostream.h', - 'stl_alloc.h', - 'stl_relops.h', - 'streambuf.h', - 'stream.h', - 'strfile.h', - 'strstream.h', - 'tempbuf.h', - 'tree.h', - 'type_traits.h', - 'vector.h', - # 17.6.1.2 C++ library headers - 'algorithm', - 'array', - 'atomic', - 'bitset', - 'chrono', - 'codecvt', - 'complex', - 'condition_variable', - 'deque', - 'exception', - 'forward_list', - 'fstream', - 'functional', - 'future', - 'initializer_list', - 'iomanip', - 'ios', - 'iosfwd', - 'iostream', - 'istream', - 'iterator', - 'limits', - 'list', - 'locale', - 'map', - 'memory', - 'mutex', - 'new', - 'numeric', - 'ostream', - 'queue', - 'random', - 'ratio', - 'regex', - 'scoped_allocator', - 'set', - 'sstream', - 'stack', - 'stdexcept', - 'streambuf', - 'string', - 'strstream', - 'system_error', - 'thread', - 'tuple', - 'typeindex', - 'typeinfo', - 'type_traits', - 'unordered_map', - 'unordered_set', - 'utility', - 'valarray', - 'vector', - # 17.6.1.2 C++ headers for C library facilities - 'cassert', - 'ccomplex', - 'cctype', - 'cerrno', - 'cfenv', - 'cfloat', - 'cinttypes', - 'ciso646', - 'climits', - 'clocale', - 'cmath', - 'csetjmp', - 'csignal', - 'cstdalign', - 'cstdarg', - 'cstdbool', - 'cstddef', - 'cstdint', - 'cstdio', - 'cstdlib', - 'cstring', - 'ctgmath', - 'ctime', - 'cuchar', - 'cwchar', - 'cwctype', - ]) - -# Type names -_TYPES = re.compile( - r'^(?:' - # [dcl.type.simple] - r'(char(16_t|32_t)?)|wchar_t|' - r'bool|short|int|long|signed|unsigned|float|double|' - # [support.types] - r'(ptrdiff_t|size_t|max_align_t|nullptr_t)|' - # [cstdint.syn] - r'(u?int(_fast|_least)?(8|16|32|64)_t)|' - r'(u?int(max|ptr)_t)|' - r')$') - - -# These headers are excluded from [build/include] and [build/include_order] -# checks: -# - Anything not following google file name conventions (containing an -# uppercase character, such as Python.h or nsStringAPI.h, for example). -# - Lua headers. -_THIRD_PARTY_HEADERS_PATTERN = re.compile( - r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$') - -# Pattern for matching FileInfo.BaseName() against test file name -_TEST_FILE_SUFFIX = r'(_test|_unittest|_regtest)$' - -# Pattern that matches only complete whitespace, possibly across multiple lines. -_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL) - -# Assertion macros. These are defined in base/logging.h and -# testing/base/public/gunit.h. -_CHECK_MACROS = [ - 'DCHECK', 'CHECK', - 'EXPECT_TRUE', 'ASSERT_TRUE', - 'EXPECT_FALSE', 'ASSERT_FALSE', - ] - -# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE -_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS]) - -for op, replacement in [('==', 'EQ'), ('!=', 'NE'), - ('>=', 'GE'), ('>', 'GT'), - ('<=', 'LE'), ('<', 'LT')]: - _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement - _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement - _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement - _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement - -for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), - ('>=', 'LT'), ('>', 'LE'), - ('<=', 'GT'), ('<', 'GE')]: - _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement - _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement - -# Alternative tokens and their replacements. For full list, see section 2.5 -# Alternative tokens [lex.digraph] in the C++ standard. -# -# Digraphs (such as '%:') are not included here since it's a mess to -# match those on a word boundary. -_ALT_TOKEN_REPLACEMENT = { - 'and': '&&', - 'bitor': '|', - 'or': '||', - 'xor': '^', - 'compl': '~', - 'bitand': '&', - 'and_eq': '&=', - 'or_eq': '|=', - 'xor_eq': '^=', - 'not': '!', - 'not_eq': '!=' - } - -# Compile regular expression that matches all the above keywords. The "[ =()]" -# bit is meant to avoid matching these keywords outside of boolean expressions. -# -# False positives include C-style multi-line comments and multi-line strings -# but those have always been troublesome for cpplint. -_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile( - r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)') - - -# These constants define types of headers for use with -# _IncludeState.CheckNextIncludeOrder(). -_C_SYS_HEADER = 1 -_CPP_SYS_HEADER = 2 -_LIKELY_MY_HEADER = 3 -_POSSIBLE_MY_HEADER = 4 -_OTHER_HEADER = 5 - -# These constants define the current inline assembly state -_NO_ASM = 0 # Outside of inline assembly block -_INSIDE_ASM = 1 # Inside inline assembly block -_END_ASM = 2 # Last line of inline assembly block -_BLOCK_ASM = 3 # The whole block is an inline assembly block - -# Match start of assembly blocks -_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' - r'(?:\s+(volatile|__volatile__))?' - r'\s*[{(]') - -# Match strings that indicate we're working on a C (not C++) file. -_SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|' - r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))') - -# Match string that indicates we're working on a Linux Kernel file. -_SEARCH_KERNEL_FILE = re.compile(r'\b(?:LINT_KERNEL_FILE)') - -_regexp_compile_cache = {} - -# {str, set(int)}: a map from error categories to sets of linenumbers -# on which those errors are expected and should be suppressed. -_error_suppressions = {} - -# The root directory used for deriving header guard CPP variable. -# This is set by --root flag. -_root = None -_root_debug = False - -# The allowed line length of files. -# This is set by --linelength flag. -_line_length = 80 - -# The allowed extensions for file names -# This is set by --extensions flag. -_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh']) - -# Treat all headers starting with 'h' equally: .h, .hpp, .hxx etc. -# This is set by --headers flag. -_hpp_headers = set(['h']) - -# {str, bool}: a map from error categories to booleans which indicate if the -# category should be suppressed for every line. -_global_error_suppressions = {} - -def ProcessHppHeadersOption(val): - global _hpp_headers - try: - _hpp_headers = set(val.split(',')) - # Automatically append to extensions list so it does not have to be set 2 times - _valid_extensions.update(_hpp_headers) - except ValueError: - PrintUsage('Header extensions must be comma seperated list.') - -def IsHeaderExtension(file_extension): - return file_extension in _hpp_headers - -def ParseNolintSuppressions(filename, raw_line, linenum, error): - """Updates the global list of line error-suppressions. - - Parses any NOLINT comments on the current line, updating the global - error_suppressions store. Reports an error if the NOLINT comment - was malformed. - - Args: - filename: str, the name of the input file. - raw_line: str, the line of input text, with comments. - linenum: int, the number of the current line. - error: function, an error handler. - """ - matched = Search(r'\bNOLINT(NEXTLINE)?\b(\([^)]+\))?', raw_line) - if matched: - if matched.group(1): - suppressed_line = linenum + 1 - else: - suppressed_line = linenum - category = matched.group(2) - if category in (None, '(*)'): # => "suppress all" - _error_suppressions.setdefault(None, set()).add(suppressed_line) - else: - if category.startswith('(') and category.endswith(')'): - category = category[1:-1] - if category in _ERROR_CATEGORIES: - _error_suppressions.setdefault(category, set()).add(suppressed_line) - elif category not in _LEGACY_ERROR_CATEGORIES: - error(filename, linenum, 'readability/nolint', 5, - 'Unknown NOLINT error category: %s' % category) - - -def ProcessGlobalSuppresions(lines): - """Updates the list of global error suppressions. - - Parses any lint directives in the file that have global effect. - - Args: - lines: An array of strings, each representing a line of the file, with the - last element being empty if the file is terminated with a newline. - """ - for line in lines: - if _SEARCH_C_FILE.search(line): - for category in _DEFAULT_C_SUPPRESSED_CATEGORIES: - _global_error_suppressions[category] = True - if _SEARCH_KERNEL_FILE.search(line): - for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES: - _global_error_suppressions[category] = True - - -def ResetNolintSuppressions(): - """Resets the set of NOLINT suppressions to empty.""" - _error_suppressions.clear() - _global_error_suppressions.clear() - - -def IsErrorSuppressedByNolint(category, linenum): - """Returns true if the specified error category is suppressed on this line. - - Consults the global error_suppressions map populated by - ParseNolintSuppressions/ProcessGlobalSuppresions/ResetNolintSuppressions. - - Args: - category: str, the category of the error. - linenum: int, the current line number. - Returns: - bool, True iff the error should be suppressed due to a NOLINT comment or - global suppression. - """ - return (_global_error_suppressions.get(category, False) or - linenum in _error_suppressions.get(category, set()) or - linenum in _error_suppressions.get(None, set())) - - -def Match(pattern, s): - """Matches the string with the pattern, caching the compiled regexp.""" - # The regexp compilation caching is inlined in both Match and Search for - # performance reasons; factoring it out into a separate function turns out - # to be noticeably expensive. - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) - return _regexp_compile_cache[pattern].match(s) - - -def ReplaceAll(pattern, rep, s): - """Replaces instances of pattern in a string with a replacement. - - The compiled regex is kept in a cache shared by Match and Search. - - Args: - pattern: regex pattern - rep: replacement text - s: search string - - Returns: - string with replacements made (or original string if no replacements) - """ - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) - return _regexp_compile_cache[pattern].sub(rep, s) - - -def Search(pattern, s): - """Searches the string for the pattern, caching the compiled regexp.""" - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) - return _regexp_compile_cache[pattern].search(s) - - -def _IsSourceExtension(s): - """File extension (excluding dot) matches a source file extension.""" - return s in ('c', 'cc', 'cpp', 'cxx') - - -class _IncludeState(object): - """Tracks line numbers for includes, and the order in which includes appear. - - include_list contains list of lists of (header, line number) pairs. - It's a lists of lists rather than just one flat list to make it - easier to update across preprocessor boundaries. - - Call CheckNextIncludeOrder() once for each header in the file, passing - in the type constants defined above. Calls in an illegal order will - raise an _IncludeError with an appropriate error message. - - """ - # self._section will move monotonically through this set. If it ever - # needs to move backwards, CheckNextIncludeOrder will raise an error. - _INITIAL_SECTION = 0 - _MY_H_SECTION = 1 - _C_SECTION = 2 - _CPP_SECTION = 3 - _OTHER_H_SECTION = 4 - - _TYPE_NAMES = { - _C_SYS_HEADER: 'C system header', - _CPP_SYS_HEADER: 'C++ system header', - _LIKELY_MY_HEADER: 'header this file implements', - _POSSIBLE_MY_HEADER: 'header this file may implement', - _OTHER_HEADER: 'other header', - } - _SECTION_NAMES = { - _INITIAL_SECTION: "... nothing. (This can't be an error.)", - _MY_H_SECTION: 'a header this file implements', - _C_SECTION: 'C system header', - _CPP_SECTION: 'C++ system header', - _OTHER_H_SECTION: 'other header', - } - - def __init__(self): - self.include_list = [[]] - self.ResetSection('') - - def FindHeader(self, header): - """Check if a header has already been included. - - Args: - header: header to check. - Returns: - Line number of previous occurrence, or -1 if the header has not - been seen before. - """ - for section_list in self.include_list: - for f in section_list: - if f[0] == header: - return f[1] - return -1 - - def ResetSection(self, directive): - """Reset section checking for preprocessor directive. - - Args: - directive: preprocessor directive (e.g. "if", "else"). - """ - # The name of the current section. - self._section = self._INITIAL_SECTION - # The path of last found header. - self._last_header = '' - - # Update list of includes. Note that we never pop from the - # include list. - if directive in ('if', 'ifdef', 'ifndef'): - self.include_list.append([]) - elif directive in ('else', 'elif'): - self.include_list[-1] = [] - - def SetLastHeader(self, header_path): - self._last_header = header_path - - def CanonicalizeAlphabeticalOrder(self, header_path): - """Returns a path canonicalized for alphabetical comparison. - - - replaces "-" with "_" so they both cmp the same. - - removes '-inl' since we don't require them to be after the main header. - - lowercase everything, just in case. - - Args: - header_path: Path to be canonicalized. - - Returns: - Canonicalized path. - """ - return header_path.replace('-inl.h', '.h').replace('-', '_').lower() - - def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): - """Check if a header is in alphabetical order with the previous header. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - header_path: Canonicalized header to be checked. - - Returns: - Returns true if the header is in alphabetical order. - """ - # If previous section is different from current section, _last_header will - # be reset to empty string, so it's always less than current header. - # - # If previous line was a blank line, assume that the headers are - # intentionally sorted the way they are. - if (self._last_header > header_path and - Match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])): - return False - return True - - def CheckNextIncludeOrder(self, header_type): - """Returns a non-empty error message if the next header is out of order. - - This function also updates the internal state to be ready to check - the next include. - - Args: - header_type: One of the _XXX_HEADER constants defined above. - - Returns: - The empty string if the header is in the right order, or an - error message describing what's wrong. - - """ - error_message = ('Found %s after %s' % - (self._TYPE_NAMES[header_type], - self._SECTION_NAMES[self._section])) - - last_section = self._section - - if header_type == _C_SYS_HEADER: - if self._section <= self._C_SECTION: - self._section = self._C_SECTION - else: - self._last_header = '' - return error_message - elif header_type == _CPP_SYS_HEADER: - if self._section <= self._CPP_SECTION: - self._section = self._CPP_SECTION - else: - self._last_header = '' - return error_message - elif header_type == _LIKELY_MY_HEADER: - if self._section <= self._MY_H_SECTION: - self._section = self._MY_H_SECTION - else: - self._section = self._OTHER_H_SECTION - elif header_type == _POSSIBLE_MY_HEADER: - if self._section <= self._MY_H_SECTION: - self._section = self._MY_H_SECTION - else: - # This will always be the fallback because we're not sure - # enough that the header is associated with this file. - self._section = self._OTHER_H_SECTION - else: - assert header_type == _OTHER_HEADER - self._section = self._OTHER_H_SECTION - - if last_section != self._section: - self._last_header = '' - - return '' - - -class _CppLintState(object): - """Maintains module-wide state..""" - - def __init__(self): - self.verbose_level = 1 # global setting. - self.error_count = 0 # global count of reported errors - # filters to apply when emitting error messages - self.filters = _DEFAULT_FILTERS[:] - # backup of filter list. Used to restore the state after each file. - self._filters_backup = self.filters[:] - self.counting = 'total' # In what way are we counting errors? - self.errors_by_category = {} # string to int dict storing error counts - self.quiet = False # Suppress non-error messagess? - - # output format: - # "emacs" - format that emacs can parse (default) - # "vs7" - format that Microsoft Visual Studio 7 can parse - self.output_format = 'emacs' - - def SetOutputFormat(self, output_format): - """Sets the output format for errors.""" - self.output_format = output_format - - def SetQuiet(self, quiet): - """Sets the module's quiet settings, and returns the previous setting.""" - last_quiet = self.quiet - self.quiet = quiet - return last_quiet - - def SetVerboseLevel(self, level): - """Sets the module's verbosity, and returns the previous setting.""" - last_verbose_level = self.verbose_level - self.verbose_level = level - return last_verbose_level - - def SetCountingStyle(self, counting_style): - """Sets the module's counting options.""" - self.counting = counting_style - - def SetFilters(self, filters): - """Sets the error-message filters. - - These filters are applied when deciding whether to emit a given - error message. - - Args: - filters: A string of comma-separated filters (eg "+whitespace/indent"). - Each filter should start with + or -; else we die. - - Raises: - ValueError: The comma-separated filters did not all start with '+' or '-'. - E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" - """ - # Default filters always have less priority than the flag ones. - self.filters = _DEFAULT_FILTERS[:] - self.AddFilters(filters) - - def AddFilters(self, filters): - """ Adds more filters to the existing list of error-message filters. """ - for filt in filters.split(','): - clean_filt = filt.strip() - if clean_filt: - self.filters.append(clean_filt) - for filt in self.filters: - if not (filt.startswith('+') or filt.startswith('-')): - raise ValueError('Every filter in --filters must start with + or -' - ' (%s does not)' % filt) - - def BackupFilters(self): - """ Saves the current filter list to backup storage.""" - self._filters_backup = self.filters[:] - - def RestoreFilters(self): - """ Restores filters previously backed up.""" - self.filters = self._filters_backup[:] - - def ResetErrorCounts(self): - """Sets the module's error statistic back to zero.""" - self.error_count = 0 - self.errors_by_category = {} - - def IncrementErrorCount(self, category): - """Bumps the module's error statistic.""" - self.error_count += 1 - if self.counting in ('toplevel', 'detailed'): - if self.counting != 'detailed': - category = category.split('/')[0] - if category not in self.errors_by_category: - self.errors_by_category[category] = 0 - self.errors_by_category[category] += 1 - - def PrintErrorCounts(self): - """Print a summary of errors by category, and the total.""" - for category, count in self.errors_by_category.iteritems(): - sys.stderr.write('Category \'%s\' errors found: %d\n' % - (category, count)) - sys.stdout.write('Total errors found: %d\n' % self.error_count) - -_cpplint_state = _CppLintState() - - -def _OutputFormat(): - """Gets the module's output format.""" - return _cpplint_state.output_format - - -def _SetOutputFormat(output_format): - """Sets the module's output format.""" - _cpplint_state.SetOutputFormat(output_format) - -def _Quiet(): - """Return's the module's quiet setting.""" - return _cpplint_state.quiet - -def _SetQuiet(quiet): - """Set the module's quiet status, and return previous setting.""" - return _cpplint_state.SetQuiet(quiet) - - -def _VerboseLevel(): - """Returns the module's verbosity setting.""" - return _cpplint_state.verbose_level - - -def _SetVerboseLevel(level): - """Sets the module's verbosity, and returns the previous setting.""" - return _cpplint_state.SetVerboseLevel(level) - - -def _SetCountingStyle(level): - """Sets the module's counting options.""" - _cpplint_state.SetCountingStyle(level) - - -def _Filters(): - """Returns the module's list of output filters, as a list.""" - return _cpplint_state.filters - - -def _SetFilters(filters): - """Sets the module's error-message filters. - - These filters are applied when deciding whether to emit a given - error message. - - Args: - filters: A string of comma-separated filters (eg "whitespace/indent"). - Each filter should start with + or -; else we die. - """ - _cpplint_state.SetFilters(filters) - -def _AddFilters(filters): - """Adds more filter overrides. - - Unlike _SetFilters, this function does not reset the current list of filters - available. - - Args: - filters: A string of comma-separated filters (eg "whitespace/indent"). - Each filter should start with + or -; else we die. - """ - _cpplint_state.AddFilters(filters) - -def _BackupFilters(): - """ Saves the current filter list to backup storage.""" - _cpplint_state.BackupFilters() - -def _RestoreFilters(): - """ Restores filters previously backed up.""" - _cpplint_state.RestoreFilters() - -class _FunctionState(object): - """Tracks current function name and the number of lines in its body.""" - - _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. - _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. - - def __init__(self): - self.in_a_function = False - self.lines_in_function = 0 - self.current_function = '' - - def Begin(self, function_name): - """Start analyzing function body. - - Args: - function_name: The name of the function being tracked. - """ - self.in_a_function = True - self.lines_in_function = 0 - self.current_function = function_name - - def Count(self): - """Count line in current function body.""" - if self.in_a_function: - self.lines_in_function += 1 - - def Check(self, error, filename, linenum): - """Report if too many lines in function body. - - Args: - error: The function to call with any errors found. - filename: The name of the current file. - linenum: The number of the line to check. - """ - if not self.in_a_function: - return - - if Match(r'T(EST|est)', self.current_function): - base_trigger = self._TEST_TRIGGER - else: - base_trigger = self._NORMAL_TRIGGER - trigger = base_trigger * 2**_VerboseLevel() - - if self.lines_in_function > trigger: - error_level = int(math.log(self.lines_in_function / base_trigger, 2)) - # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... - if error_level > 5: - error_level = 5 - error(filename, linenum, 'readability/fn_size', error_level, - 'Small and focused functions are preferred:' - ' %s has %d non-comment lines' - ' (error triggered by exceeding %d lines).' % ( - self.current_function, self.lines_in_function, trigger)) - - def End(self): - """Stop analyzing function body.""" - self.in_a_function = False - - -class _IncludeError(Exception): - """Indicates a problem with the include order in a file.""" - pass - - -class FileInfo(object): - """Provides utility functions for filenames. - - FileInfo provides easy access to the components of a file's path - relative to the project root. - """ - - def __init__(self, filename): - self._filename = filename - - def FullName(self): - """Make Windows paths like Unix.""" - return os.path.abspath(self._filename).replace('\\', '/') - - def RepositoryName(self): - """FullName after removing the local path to the repository. - - If we have a real absolute path name here we can try to do something smart: - detecting the root of the checkout and truncating /path/to/checkout from - the name so that we get header guards that don't include things like - "C:\Documents and Settings\..." or "/home/username/..." in them and thus - people on different computers who have checked the source out to different - locations won't see bogus errors. - """ - fullname = self.FullName() - - if os.path.exists(fullname): - project_dir = os.path.dirname(fullname) - - if os.path.exists(os.path.join(project_dir, ".svn")): - # If there's a .svn file in the current directory, we recursively look - # up the directory tree for the top of the SVN checkout - root_dir = project_dir - one_up_dir = os.path.dirname(root_dir) - while os.path.exists(os.path.join(one_up_dir, ".svn")): - root_dir = os.path.dirname(root_dir) - one_up_dir = os.path.dirname(one_up_dir) - - prefix = os.path.commonprefix([root_dir, project_dir]) - return fullname[len(prefix) + 1:] - - # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by - # searching up from the current path. - root_dir = current_dir = os.path.dirname(fullname) - while current_dir != os.path.dirname(current_dir): - if (os.path.exists(os.path.join(current_dir, ".git")) or - os.path.exists(os.path.join(current_dir, ".hg")) or - os.path.exists(os.path.join(current_dir, ".svn"))): - root_dir = current_dir - current_dir = os.path.dirname(current_dir) - - if (os.path.exists(os.path.join(root_dir, ".git")) or - os.path.exists(os.path.join(root_dir, ".hg")) or - os.path.exists(os.path.join(root_dir, ".svn"))): - prefix = os.path.commonprefix([root_dir, project_dir]) - return fullname[len(prefix) + 1:] - - # Don't know what to do; header guard warnings may be wrong... - return fullname - - def Split(self): - """Splits the file into the directory, basename, and extension. - - For 'chrome/browser/browser.cc', Split() would - return ('chrome/browser', 'browser', '.cc') - - Returns: - A tuple of (directory, basename, extension). - """ - - googlename = self.RepositoryName() - project, rest = os.path.split(googlename) - return (project,) + os.path.splitext(rest) - - def BaseName(self): - """File base name - text after the final slash, before the final period.""" - return self.Split()[1] - - def Extension(self): - """File extension - text following the final period.""" - return self.Split()[2] - - def NoExtension(self): - """File has no source file extension.""" - return '/'.join(self.Split()[0:2]) - - def IsSource(self): - """File has a source file extension.""" - return _IsSourceExtension(self.Extension()[1:]) - - -def _ShouldPrintError(category, confidence, linenum): - """If confidence >= verbose, category passes filter and is not suppressed.""" - - # There are three ways we might decide not to print an error message: - # a "NOLINT(category)" comment appears in the source, - # the verbosity level isn't high enough, or the filters filter it out. - if IsErrorSuppressedByNolint(category, linenum): - return False - - if confidence < _cpplint_state.verbose_level: - return False - - is_filtered = False - for one_filter in _Filters(): - if one_filter.startswith('-'): - if category.startswith(one_filter[1:]): - is_filtered = True - elif one_filter.startswith('+'): - if category.startswith(one_filter[1:]): - is_filtered = False - else: - assert False # should have been checked for in SetFilter. - if is_filtered: - return False - - return True - - -def Error(filename, linenum, category, confidence, message): - """Logs the fact we've found a lint error. - - We log where the error was found, and also our confidence in the error, - that is, how certain we are this is a legitimate style regression, and - not a misidentification or a use that's sometimes justified. - - False positives can be suppressed by the use of - "cpplint(category)" comments on the offending line. These are - parsed into _error_suppressions. - - Args: - filename: The name of the file containing the error. - linenum: The number of the line containing the error. - category: A string used to describe the "category" this bug - falls under: "whitespace", say, or "runtime". Categories - may have a hierarchy separated by slashes: "whitespace/indent". - confidence: A number from 1-5 representing a confidence score for - the error, with 5 meaning that we are certain of the problem, - and 1 meaning that it could be a legitimate construct. - message: The error message. - """ - if _ShouldPrintError(category, confidence, linenum): - _cpplint_state.IncrementErrorCount(category) - if _cpplint_state.output_format == 'vs7': - sys.stderr.write('%s(%s): error cpplint: [%s] %s [%d]\n' % ( - filename, linenum, category, message, confidence)) - elif _cpplint_state.output_format == 'eclipse': - sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % ( - filename, linenum, message, category, confidence)) - else: - sys.stderr.write('%s:%s: %s [%s] [%d]\n' % ( - filename, linenum, message, category, confidence)) - - -# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard. -_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( - r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') -# Match a single C style comment on the same line. -_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/' -# Matches multi-line C style comments. -# This RE is a little bit more complicated than one might expect, because we -# have to take care of space removals tools so we can handle comments inside -# statements better. -# The current rule is: We only clear spaces from both sides when we're at the -# end of the line. Otherwise, we try to remove spaces from the right side, -# if this doesn't work we try on left side but only if there's a non-character -# on the right. -_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( - r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' + - _RE_PATTERN_C_COMMENTS + r'\s+|' + - r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' + - _RE_PATTERN_C_COMMENTS + r')') - - -def IsCppString(line): - """Does line terminate so, that the next symbol is in string constant. - - This function does not consider single-line nor multi-line comments. - - Args: - line: is a partial line of code starting from the 0..n. - - Returns: - True, if next character appended to 'line' is inside a - string constant. - """ - - line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" - return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 - - -def CleanseRawStrings(raw_lines): - """Removes C++11 raw strings from lines. - - Before: - static const char kData[] = R"( - multi-line string - )"; - - After: - static const char kData[] = "" - (replaced by blank line) - ""; - - Args: - raw_lines: list of raw lines. - - Returns: - list of lines with C++11 raw strings replaced by empty strings. - """ - - delimiter = None - lines_without_raw_strings = [] - for line in raw_lines: - if delimiter: - # Inside a raw string, look for the end - end = line.find(delimiter) - if end >= 0: - # Found the end of the string, match leading space for this - # line and resume copying the original lines, and also insert - # a "" on the last line. - leading_space = Match(r'^(\s*)\S', line) - line = leading_space.group(1) + '""' + line[end + len(delimiter):] - delimiter = None - else: - # Haven't found the end yet, append a blank line. - line = '""' - - # Look for beginning of a raw string, and replace them with - # empty strings. This is done in a loop to handle multiple raw - # strings on the same line. - while delimiter is None: - # Look for beginning of a raw string. - # See 2.14.15 [lex.string] for syntax. - # - # Once we have matched a raw string, we check the prefix of the - # line to make sure that the line is not part of a single line - # comment. It's done this way because we remove raw strings - # before removing comments as opposed to removing comments - # before removing raw strings. This is because there are some - # cpplint checks that requires the comments to be preserved, but - # we don't want to check comments that are inside raw strings. - matched = Match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) - if (matched and - not Match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//', - matched.group(1))): - delimiter = ')' + matched.group(2) + '"' - - end = matched.group(3).find(delimiter) - if end >= 0: - # Raw string ended on same line - line = (matched.group(1) + '""' + - matched.group(3)[end + len(delimiter):]) - delimiter = None - else: - # Start of a multi-line raw string - line = matched.group(1) + '""' - else: - break - - lines_without_raw_strings.append(line) - - # TODO(unknown): if delimiter is not None here, we might want to - # emit a warning for unterminated string. - return lines_without_raw_strings - - -def FindNextMultiLineCommentStart(lines, lineix): - """Find the beginning marker for a multiline comment.""" - while lineix < len(lines): - if lines[lineix].strip().startswith('/*'): - # Only return this marker if the comment goes beyond this line - if lines[lineix].strip().find('*/', 2) < 0: - return lineix - lineix += 1 - return len(lines) - - -def FindNextMultiLineCommentEnd(lines, lineix): - """We are inside a comment, find the end marker.""" - while lineix < len(lines): - if lines[lineix].strip().endswith('*/'): - return lineix - lineix += 1 - return len(lines) - - -def RemoveMultiLineCommentsFromRange(lines, begin, end): - """Clears a range of lines for multi-line comments.""" - # Having // dummy comments makes the lines non-empty, so we will not get - # unnecessary blank line warnings later in the code. - for i in range(begin, end): - lines[i] = '/**/' - - -def RemoveMultiLineComments(filename, lines, error): - """Removes multiline (c-style) comments from lines.""" - lineix = 0 - while lineix < len(lines): - lineix_begin = FindNextMultiLineCommentStart(lines, lineix) - if lineix_begin >= len(lines): - return - lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin) - if lineix_end >= len(lines): - error(filename, lineix_begin + 1, 'readability/multiline_comment', 5, - 'Could not find end of multi-line comment') - return - RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1) - lineix = lineix_end + 1 - - -def CleanseComments(line): - """Removes //-comments and single-line C-style /* */ comments. - - Args: - line: A line of C++ source. - - Returns: - The line with single-line comments removed. - """ - commentpos = line.find('//') - if commentpos != -1 and not IsCppString(line[:commentpos]): - line = line[:commentpos].rstrip() - # get rid of /* ... */ - return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) - - -class CleansedLines(object): - """Holds 4 copies of all lines with different preprocessing applied to them. - - 1) elided member contains lines without strings and comments. - 2) lines member contains lines without comments. - 3) raw_lines member contains all the lines without processing. - 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw - strings removed. - All these members are of , and of the same length. - """ - - def __init__(self, lines): - self.elided = [] - self.lines = [] - self.raw_lines = lines - self.num_lines = len(lines) - self.lines_without_raw_strings = CleanseRawStrings(lines) - for linenum in range(len(self.lines_without_raw_strings)): - self.lines.append(CleanseComments( - self.lines_without_raw_strings[linenum])) - elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) - self.elided.append(CleanseComments(elided)) - - def NumLines(self): - """Returns the number of lines represented.""" - return self.num_lines - - @staticmethod - def _CollapseStrings(elided): - """Collapses strings and chars on a line to simple "" or '' blocks. - - We nix strings first so we're not fooled by text like '"http://"' - - Args: - elided: The line being processed. - - Returns: - The line with collapsed strings. - """ - if _RE_PATTERN_INCLUDE.match(elided): - return elided - - # Remove escaped characters first to make quote/single quote collapsing - # basic. Things that look like escaped characters shouldn't occur - # outside of strings and chars. - elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) - - # Replace quoted strings and digit separators. Both single quotes - # and double quotes are processed in the same loop, otherwise - # nested quotes wouldn't work. - collapsed = '' - while True: - # Find the first quote character - match = Match(r'^([^\'"]*)([\'"])(.*)$', elided) - if not match: - collapsed += elided - break - head, quote, tail = match.groups() - - if quote == '"': - # Collapse double quoted strings - second_quote = tail.find('"') - if second_quote >= 0: - collapsed += head + '""' - elided = tail[second_quote + 1:] - else: - # Unmatched double quote, don't bother processing the rest - # of the line since this is probably a multiline string. - collapsed += elided - break - else: - # Found single quote, check nearby text to eliminate digit separators. - # - # There is no special handling for floating point here, because - # the integer/fractional/exponent parts would all be parsed - # correctly as long as there are digits on both sides of the - # separator. So we are fine as long as we don't see something - # like "0.'3" (gcc 4.9.0 will not allow this literal). - if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): - match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) - collapsed += head + match_literal.group(1).replace("'", '') - elided = match_literal.group(2) - else: - second_quote = tail.find('\'') - if second_quote >= 0: - collapsed += head + "''" - elided = tail[second_quote + 1:] - else: - # Unmatched single quote - collapsed += elided - break - - return collapsed - - -def FindEndOfExpressionInLine(line, startpos, stack): - """Find the position just after the end of current parenthesized expression. - - Args: - line: a CleansedLines line. - startpos: start searching at this position. - stack: nesting stack at startpos. - - Returns: - On finding matching end: (index just after matching end, None) - On finding an unclosed expression: (-1, None) - Otherwise: (-1, new stack at end of this line) - """ - for i in xrange(startpos, len(line)): - char = line[i] - if char in '([{': - # Found start of parenthesized expression, push to expression stack - stack.append(char) - elif char == '<': - # Found potential start of template argument list - if i > 0 and line[i - 1] == '<': - # Left shift operator - if stack and stack[-1] == '<': - stack.pop() - if not stack: - return (-1, None) - elif i > 0 and Search(r'\boperator\s*$', line[0:i]): - # operator<, don't add to stack - continue - else: - # Tentative start of template argument list - stack.append('<') - elif char in ')]}': - # Found end of parenthesized expression. - # - # If we are currently expecting a matching '>', the pending '<' - # must have been an operator. Remove them from expression stack. - while stack and stack[-1] == '<': - stack.pop() - if not stack: - return (-1, None) - if ((stack[-1] == '(' and char == ')') or - (stack[-1] == '[' and char == ']') or - (stack[-1] == '{' and char == '}')): - stack.pop() - if not stack: - return (i + 1, None) - else: - # Mismatched parentheses - return (-1, None) - elif char == '>': - # Found potential end of template argument list. - - # Ignore "->" and operator functions - if (i > 0 and - (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))): - continue - - # Pop the stack if there is a matching '<'. Otherwise, ignore - # this '>' since it must be an operator. - if stack: - if stack[-1] == '<': - stack.pop() - if not stack: - return (i + 1, None) - elif char == ';': - # Found something that look like end of statements. If we are currently - # expecting a '>', the matching '<' must have been an operator, since - # template argument list should not contain statements. - while stack and stack[-1] == '<': - stack.pop() - if not stack: - return (-1, None) - - # Did not find end of expression or unbalanced parentheses on this line - return (-1, stack) - - -def CloseExpression(clean_lines, linenum, pos): - """If input points to ( or { or [ or <, finds the position that closes it. - - If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the - linenum/pos that correspond to the closing of the expression. - - TODO(unknown): cpplint spends a fair bit of time matching parentheses. - Ideally we would want to index all opening and closing parentheses once - and have CloseExpression be just a simple lookup, but due to preprocessor - tricks, this is not so easy. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - pos: A position on the line. - - Returns: - A tuple (line, linenum, pos) pointer *past* the closing brace, or - (line, len(lines), -1) if we never find a close. Note we ignore - strings and comments when matching; and the line we return is the - 'cleansed' line at linenum. - """ - - line = clean_lines.elided[linenum] - if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]): - return (line, clean_lines.NumLines(), -1) - - # Check first line - (end_pos, stack) = FindEndOfExpressionInLine(line, pos, []) - if end_pos > -1: - return (line, linenum, end_pos) - - # Continue scanning forward - while stack and linenum < clean_lines.NumLines() - 1: - linenum += 1 - line = clean_lines.elided[linenum] - (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack) - if end_pos > -1: - return (line, linenum, end_pos) - - # Did not find end of expression before end of file, give up - return (line, clean_lines.NumLines(), -1) - - -def FindStartOfExpressionInLine(line, endpos, stack): - """Find position at the matching start of current expression. - - This is almost the reverse of FindEndOfExpressionInLine, but note - that the input position and returned position differs by 1. - - Args: - line: a CleansedLines line. - endpos: start searching at this position. - stack: nesting stack at endpos. - - Returns: - On finding matching start: (index at matching start, None) - On finding an unclosed expression: (-1, None) - Otherwise: (-1, new stack at beginning of this line) - """ - i = endpos - while i >= 0: - char = line[i] - if char in ')]}': - # Found end of expression, push to expression stack - stack.append(char) - elif char == '>': - # Found potential end of template argument list. - # - # Ignore it if it's a "->" or ">=" or "operator>" - if (i > 0 and - (line[i - 1] == '-' or - Match(r'\s>=\s', line[i - 1:]) or - Search(r'\boperator\s*$', line[0:i]))): - i -= 1 - else: - stack.append('>') - elif char == '<': - # Found potential start of template argument list - if i > 0 and line[i - 1] == '<': - # Left shift operator - i -= 1 - else: - # If there is a matching '>', we can pop the expression stack. - # Otherwise, ignore this '<' since it must be an operator. - if stack and stack[-1] == '>': - stack.pop() - if not stack: - return (i, None) - elif char in '([{': - # Found start of expression. - # - # If there are any unmatched '>' on the stack, they must be - # operators. Remove those. - while stack and stack[-1] == '>': - stack.pop() - if not stack: - return (-1, None) - if ((char == '(' and stack[-1] == ')') or - (char == '[' and stack[-1] == ']') or - (char == '{' and stack[-1] == '}')): - stack.pop() - if not stack: - return (i, None) - else: - # Mismatched parentheses - return (-1, None) - elif char == ';': - # Found something that look like end of statements. If we are currently - # expecting a '<', the matching '>' must have been an operator, since - # template argument list should not contain statements. - while stack and stack[-1] == '>': - stack.pop() - if not stack: - return (-1, None) - - i -= 1 - - return (-1, stack) - - -def ReverseCloseExpression(clean_lines, linenum, pos): - """If input points to ) or } or ] or >, finds the position that opens it. - - If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the - linenum/pos that correspond to the opening of the expression. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - pos: A position on the line. - - Returns: - A tuple (line, linenum, pos) pointer *at* the opening brace, or - (line, 0, -1) if we never find the matching opening brace. Note - we ignore strings and comments when matching; and the line we - return is the 'cleansed' line at linenum. - """ - line = clean_lines.elided[linenum] - if line[pos] not in ')}]>': - return (line, 0, -1) - - # Check last line - (start_pos, stack) = FindStartOfExpressionInLine(line, pos, []) - if start_pos > -1: - return (line, linenum, start_pos) - - # Continue scanning backward - while stack and linenum > 0: - linenum -= 1 - line = clean_lines.elided[linenum] - (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack) - if start_pos > -1: - return (line, linenum, start_pos) - - # Did not find start of expression before beginning of file, give up - return (line, 0, -1) - - -def CheckForCopyright(filename, lines, error): - """Logs an error if no Copyright message appears at the top of the file.""" - - # We'll say it should occur by line 10. Don't forget there's a - # dummy line at the front. - for line in xrange(1, min(len(lines), 11)): - if re.search(r'Copyright', lines[line], re.I): break - else: # means no copyright line was found - error(filename, 0, 'legal/copyright', 5, - 'No copyright message found. ' - 'You should have a line: "Copyright [year] "') - - -def GetIndentLevel(line): - """Return the number of leading spaces in line. - - Args: - line: A string to check. - - Returns: - An integer count of leading spaces, possibly zero. - """ - indent = Match(r'^( *)\S', line) - if indent: - return len(indent.group(1)) - else: - return 0 - -def PathSplitToList(path): - """Returns the path split into a list by the separator. - - Args: - path: An absolute or relative path (e.g. '/a/b/c/' or '../a') - - Returns: - A list of path components (e.g. ['a', 'b', 'c]). - """ - lst = [] - while True: - (head, tail) = os.path.split(path) - if head == path: # absolute paths end - lst.append(head) - break - if tail == path: # relative paths end - lst.append(tail) - break - - path = head - lst.append(tail) - - lst.reverse() - return lst - -def GetHeaderGuardCPPVariable(filename): - """Returns the CPP variable that should be used as a header guard. - - Args: - filename: The name of a C++ header file. - - Returns: - The CPP variable that should be used as a header guard in the - named file. - - """ - - # Restores original filename in case that cpplint is invoked from Emacs's - # flymake. - filename = re.sub(r'_flymake\.h$', '.h', filename) - filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename) - # Replace 'c++' with 'cpp'. - filename = filename.replace('C++', 'cpp').replace('c++', 'cpp') - - fileinfo = FileInfo(filename) - file_path_from_root = fileinfo.RepositoryName() - - def FixupPathFromRoot(): - if _root_debug: - sys.stderr.write("\n_root fixup, _root = '%s', repository name = '%s'\n" - %(_root, fileinfo.RepositoryName())) - - # Process the file path with the --root flag if it was set. - if not _root: - if _root_debug: - sys.stderr.write("_root unspecified\n") - return file_path_from_root - - def StripListPrefix(lst, prefix): - # f(['x', 'y'], ['w, z']) -> None (not a valid prefix) - if lst[:len(prefix)] != prefix: - return None - # f(['a, 'b', 'c', 'd'], ['a', 'b']) -> ['c', 'd'] - return lst[(len(prefix)):] - - # root behavior: - # --root=subdir , lstrips subdir from the header guard - maybe_path = StripListPrefix(PathSplitToList(file_path_from_root), - PathSplitToList(_root)) - - if _root_debug: - sys.stderr.write("_root lstrip (maybe_path=%s, file_path_from_root=%s," + - " _root=%s)\n" %(maybe_path, file_path_from_root, _root)) - - if maybe_path: - return os.path.join(*maybe_path) - - # --root=.. , will prepend the outer directory to the header guard - full_path = fileinfo.FullName() - root_abspath = os.path.abspath(_root) - - maybe_path = StripListPrefix(PathSplitToList(full_path), - PathSplitToList(root_abspath)) - - if _root_debug: - sys.stderr.write("_root prepend (maybe_path=%s, full_path=%s, " + - "root_abspath=%s)\n" %(maybe_path, full_path, root_abspath)) - - if maybe_path: - return os.path.join(*maybe_path) - - if _root_debug: - sys.stderr.write("_root ignore, returning %s\n" %(file_path_from_root)) - - # --root=FAKE_DIR is ignored - return file_path_from_root - - file_path_from_root = FixupPathFromRoot() - return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_' - - -def CheckForHeaderGuard(filename, clean_lines, error): - """Checks that the file contains a header guard. - - Logs an error if no #ifndef header guard is present. For other - headers, checks that the full pathname is used. - - Args: - filename: The name of the C++ header file. - clean_lines: A CleansedLines instance containing the file. - error: The function to call with any errors found. - """ - - # Don't check for header guards if there are error suppression - # comments somewhere in this file. - # - # Because this is silencing a warning for a nonexistent line, we - # only support the very specific NOLINT(build/header_guard) syntax, - # and not the general NOLINT or NOLINT(*) syntax. - raw_lines = clean_lines.lines_without_raw_strings - for i in raw_lines: - if Search(r'//\s*NOLINT\(build/header_guard\)', i): - return - - cppvar = GetHeaderGuardCPPVariable(filename) - - ifndef = '' - ifndef_linenum = 0 - define = '' - endif = '' - endif_linenum = 0 - for linenum, line in enumerate(raw_lines): - linesplit = line.split() - if len(linesplit) >= 2: - # find the first occurrence of #ifndef and #define, save arg - if not ifndef and linesplit[0] == '#ifndef': - # set ifndef to the header guard presented on the #ifndef line. - ifndef = linesplit[1] - ifndef_linenum = linenum - if not define and linesplit[0] == '#define': - define = linesplit[1] - # find the last occurrence of #endif, save entire line - if line.startswith('#endif'): - endif = line - endif_linenum = linenum - - if not ifndef or not define or ifndef != define: - error(filename, 0, 'build/header_guard', 5, - 'No #ifndef header guard found, suggested CPP variable is: %s' % - cppvar) - return - - # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ - # for backward compatibility. - if ifndef != cppvar: - error_level = 0 - if ifndef != cppvar + '_': - error_level = 5 - - ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum, - error) - error(filename, ifndef_linenum, 'build/header_guard', error_level, - '#ifndef header guard has wrong style, please use: %s' % cppvar) - - # Check for "//" comments on endif line. - ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum, - error) - match = Match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif) - if match: - if match.group(1) == '_': - # Issue low severity warning for deprecated double trailing underscore - error(filename, endif_linenum, 'build/header_guard', 0, - '#endif line should be "#endif // %s"' % cppvar) - return - - # Didn't find the corresponding "//" comment. If this file does not - # contain any "//" comments at all, it could be that the compiler - # only wants "/**/" comments, look for those instead. - no_single_line_comments = True - for i in xrange(1, len(raw_lines) - 1): - line = raw_lines[i] - if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line): - no_single_line_comments = False - break - - if no_single_line_comments: - match = Match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif) - if match: - if match.group(1) == '_': - # Low severity warning for double trailing underscore - error(filename, endif_linenum, 'build/header_guard', 0, - '#endif line should be "#endif /* %s */"' % cppvar) - return - - # Didn't find anything - error(filename, endif_linenum, 'build/header_guard', 5, - '#endif line should be "#endif // %s"' % cppvar) - - -def CheckHeaderFileIncluded(filename, include_state, error): - """Logs an error if a .cc file does not include its header.""" - - # Do not check test files - fileinfo = FileInfo(filename) - if Search(_TEST_FILE_SUFFIX, fileinfo.BaseName()): - return - - headerfile = filename[0:len(filename) - len(fileinfo.Extension())] + '.h' - if not os.path.exists(headerfile): - return - headername = FileInfo(headerfile).RepositoryName() - first_include = 0 - for section_list in include_state.include_list: - for f in section_list: - if headername in f[0] or f[0] in headername: - return - if not first_include: - first_include = f[1] - - error(filename, first_include, 'build/include', 5, - '%s should include its header file %s' % (fileinfo.RepositoryName(), - headername)) - - -def CheckForBadCharacters(filename, lines, error): - """Logs an error for each line containing bad characters. - - Two kinds of bad characters: - - 1. Unicode replacement characters: These indicate that either the file - contained invalid UTF-8 (likely) or Unicode replacement characters (which - it shouldn't). Note that it's possible for this to throw off line - numbering if the invalid UTF-8 occurred adjacent to a newline. - - 2. NUL bytes. These are problematic for some tools. - - Args: - filename: The name of the current file. - lines: An array of strings, each representing a line of the file. - error: The function to call with any errors found. - """ - for linenum, line in enumerate(lines): - if u'\ufffd' in line: - error(filename, linenum, 'readability/utf8', 5, - 'Line contains invalid UTF-8 (or Unicode replacement character).') - if '\0' in line: - error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.') - - -def CheckForNewlineAtEOF(filename, lines, error): - """Logs an error if there is no newline char at the end of the file. - - Args: - filename: The name of the current file. - lines: An array of strings, each representing a line of the file. - error: The function to call with any errors found. - """ - - # The array lines() was created by adding two newlines to the - # original file (go figure), then splitting on \n. - # To verify that the file ends in \n, we just have to make sure the - # last-but-two element of lines() exists and is empty. - if len(lines) < 3 or lines[-2]: - error(filename, len(lines) - 2, 'whitespace/ending_newline', 5, - 'Could not find a newline character at the end of the file.') - - -def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): - """Logs an error if we see /* ... */ or "..." that extend past one line. - - /* ... */ comments are legit inside macros, for one line. - Otherwise, we prefer // comments, so it's ok to warn about the - other. Likewise, it's ok for strings to extend across multiple - lines, as long as a line continuation character (backslash) - terminates each line. Although not currently prohibited by the C++ - style guide, it's ugly and unnecessary. We don't do well with either - in this lint program, so we warn about both. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Remove all \\ (escaped backslashes) from the line. They are OK, and the - # second (escaped) slash may trigger later \" detection erroneously. - line = line.replace('\\\\', '') - - if line.count('/*') > line.count('*/'): - error(filename, linenum, 'readability/multiline_comment', 5, - 'Complex multi-line /*...*/-style comment found. ' - 'Lint may give bogus warnings. ' - 'Consider replacing these with //-style comments, ' - 'with #if 0...#endif, ' - 'or with more clearly structured multi-line comments.') - - if (line.count('"') - line.count('\\"')) % 2: - error(filename, linenum, 'readability/multiline_string', 5, - 'Multi-line string ("...") found. This lint script doesn\'t ' - 'do well with such strings, and may give bogus warnings. ' - 'Use C++11 raw strings or concatenation instead.') - - -# (non-threadsafe name, thread-safe alternative, validation pattern) -# -# The validation pattern is used to eliminate false positives such as: -# _rand(); // false positive due to substring match. -# ->rand(); // some member function rand(). -# ACMRandom rand(seed); // some variable named rand. -# ISAACRandom rand(); // another variable named rand. -# -# Basically we require the return value of these functions to be used -# in some expression context on the same line by matching on some -# operator before the function name. This eliminates constructors and -# member function calls. -_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)' -_THREADING_LIST = ( - ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'), - ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'), - ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'), - ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'), - ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'), - ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'), - ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'), - ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'), - ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'), - ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'), - ('strtok(', 'strtok_r(', - _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'), - ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'), - ) - - -def CheckPosixThreading(filename, clean_lines, linenum, error): - """Checks for calls to thread-unsafe functions. - - Much code has been originally written without consideration of - multi-threading. Also, engineers are relying on their old experience; - they have learned posix before threading extensions were added. These - tests guide the engineers to use thread-safe functions (when using - posix directly). - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST: - # Additional pattern matching check to confirm that this is the - # function we are looking for - if Search(pattern, line): - error(filename, linenum, 'runtime/threadsafe_fn', 2, - 'Consider using ' + multithread_safe_func + - '...) instead of ' + single_thread_func + - '...) for improved thread safety.') - - -def CheckVlogArguments(filename, clean_lines, linenum, error): - """Checks that VLOG() is only used for defining a logging level. - - For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and - VLOG(FATAL) are not. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line): - error(filename, linenum, 'runtime/vlog', 5, - 'VLOG() should be used with numeric verbosity level. ' - 'Use LOG() if you want symbolic severity levels.') - -# Matches invalid increment: *count++, which moves pointer instead of -# incrementing a value. -_RE_PATTERN_INVALID_INCREMENT = re.compile( - r'^\s*\*\w+(\+\+|--);') - - -def CheckInvalidIncrement(filename, clean_lines, linenum, error): - """Checks for invalid increment *count++. - - For example following function: - void increment_counter(int* count) { - *count++; - } - is invalid, because it effectively does count++, moving pointer, and should - be replaced with ++*count, (*count)++ or *count += 1. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - if _RE_PATTERN_INVALID_INCREMENT.match(line): - error(filename, linenum, 'runtime/invalid_increment', 5, - 'Changing pointer instead of value (or unused value of operator*).') - - -def IsMacroDefinition(clean_lines, linenum): - if Search(r'^#define', clean_lines[linenum]): - return True - - if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]): - return True - - return False - - -def IsForwardClassDeclaration(clean_lines, linenum): - return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum]) - - -class _BlockInfo(object): - """Stores information about a generic block of code.""" - - def __init__(self, linenum, seen_open_brace): - self.starting_linenum = linenum - self.seen_open_brace = seen_open_brace - self.open_parentheses = 0 - self.inline_asm = _NO_ASM - self.check_namespace_indentation = False - - def CheckBegin(self, filename, clean_lines, linenum, error): - """Run checks that applies to text up to the opening brace. - - This is mostly for checking the text after the class identifier - and the "{", usually where the base class is specified. For other - blocks, there isn't much to check, so we always pass. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - pass - - def CheckEnd(self, filename, clean_lines, linenum, error): - """Run checks that applies to text after the closing brace. - - This is mostly used for checking end of namespace comments. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - pass - - def IsBlockInfo(self): - """Returns true if this block is a _BlockInfo. - - This is convenient for verifying that an object is an instance of - a _BlockInfo, but not an instance of any of the derived classes. - - Returns: - True for this class, False for derived classes. - """ - return self.__class__ == _BlockInfo - - -class _ExternCInfo(_BlockInfo): - """Stores information about an 'extern "C"' block.""" - - def __init__(self, linenum): - _BlockInfo.__init__(self, linenum, True) - - -class _ClassInfo(_BlockInfo): - """Stores information about a class.""" - - def __init__(self, name, class_or_struct, clean_lines, linenum): - _BlockInfo.__init__(self, linenum, False) - self.name = name - self.is_derived = False - self.check_namespace_indentation = True - if class_or_struct == 'struct': - self.access = 'public' - self.is_struct = True - else: - self.access = 'private' - self.is_struct = False - - # Remember initial indentation level for this class. Using raw_lines here - # instead of elided to account for leading comments. - self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum]) - - # Try to find the end of the class. This will be confused by things like: - # class A { - # } *x = { ... - # - # But it's still good enough for CheckSectionSpacing. - self.last_line = 0 - depth = 0 - for i in range(linenum, clean_lines.NumLines()): - line = clean_lines.elided[i] - depth += line.count('{') - line.count('}') - if not depth: - self.last_line = i - break - - def CheckBegin(self, filename, clean_lines, linenum, error): - # Look for a bare ':' - if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): - self.is_derived = True - - def CheckEnd(self, filename, clean_lines, linenum, error): - # If there is a DISALLOW macro, it should appear near the end of - # the class. - seen_last_thing_in_class = False - for i in xrange(linenum - 1, self.starting_linenum, -1): - match = Search( - r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' + - self.name + r'\)', - clean_lines.elided[i]) - if match: - if seen_last_thing_in_class: - error(filename, i, 'readability/constructors', 3, - match.group(1) + ' should be the last thing in the class') - break - - if not Match(r'^\s*$', clean_lines.elided[i]): - seen_last_thing_in_class = True - - # Check that closing brace is aligned with beginning of the class. - # Only do this if the closing brace is indented by only whitespaces. - # This means we will not check single-line class definitions. - indent = Match(r'^( *)\}', clean_lines.elided[linenum]) - if indent and len(indent.group(1)) != self.class_indent: - if self.is_struct: - parent = 'struct ' + self.name - else: - parent = 'class ' + self.name - error(filename, linenum, 'whitespace/indent', 3, - 'Closing brace should be aligned with beginning of %s' % parent) - - -class _NamespaceInfo(_BlockInfo): - """Stores information about a namespace.""" - - def __init__(self, name, linenum): - _BlockInfo.__init__(self, linenum, False) - self.name = name or '' - self.check_namespace_indentation = True - - def CheckEnd(self, filename, clean_lines, linenum, error): - """Check end of namespace comments.""" - line = clean_lines.raw_lines[linenum] - - # Check how many lines is enclosed in this namespace. Don't issue - # warning for missing namespace comments if there aren't enough - # lines. However, do apply checks if there is already an end of - # namespace comment and it's incorrect. - # - # TODO(unknown): We always want to check end of namespace comments - # if a namespace is large, but sometimes we also want to apply the - # check if a short namespace contained nontrivial things (something - # other than forward declarations). There is currently no logic on - # deciding what these nontrivial things are, so this check is - # triggered by namespace size only, which works most of the time. - if (linenum - self.starting_linenum < 10 - and not Match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)): - return - - # Look for matching comment at end of namespace. - # - # Note that we accept C style "/* */" comments for terminating - # namespaces, so that code that terminate namespaces inside - # preprocessor macros can be cpplint clean. - # - # We also accept stuff like "// end of namespace ." with the - # period at the end. - # - # Besides these, we don't accept anything else, otherwise we might - # get false negatives when existing comment is a substring of the - # expected namespace. - if self.name: - # Named namespace - if not Match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' + - re.escape(self.name) + r'[\*/\.\\\s]*$'), - line): - error(filename, linenum, 'readability/namespace', 5, - 'Namespace should be terminated with "// namespace %s"' % - self.name) - else: - # Anonymous namespace - if not Match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): - # If "// namespace anonymous" or "// anonymous namespace (more text)", - # mention "// anonymous namespace" as an acceptable form - if Match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line): - error(filename, linenum, 'readability/namespace', 5, - 'Anonymous namespace should be terminated with "// namespace"' - ' or "// anonymous namespace"') - else: - error(filename, linenum, 'readability/namespace', 5, - 'Anonymous namespace should be terminated with "// namespace"') - - -class _PreprocessorInfo(object): - """Stores checkpoints of nesting stacks when #if/#else is seen.""" - - def __init__(self, stack_before_if): - # The entire nesting stack before #if - self.stack_before_if = stack_before_if - - # The entire nesting stack up to #else - self.stack_before_else = [] - - # Whether we have already seen #else or #elif - self.seen_else = False - - -class NestingState(object): - """Holds states related to parsing braces.""" - - def __init__(self): - # Stack for tracking all braces. An object is pushed whenever we - # see a "{", and popped when we see a "}". Only 3 types of - # objects are possible: - # - _ClassInfo: a class or struct. - # - _NamespaceInfo: a namespace. - # - _BlockInfo: some other type of block. - self.stack = [] - - # Top of the previous stack before each Update(). - # - # Because the nesting_stack is updated at the end of each line, we - # had to do some convoluted checks to find out what is the current - # scope at the beginning of the line. This check is simplified by - # saving the previous top of nesting stack. - # - # We could save the full stack, but we only need the top. Copying - # the full nesting stack would slow down cpplint by ~10%. - self.previous_stack_top = [] - - # Stack of _PreprocessorInfo objects. - self.pp_stack = [] - - def SeenOpenBrace(self): - """Check if we have seen the opening brace for the innermost block. - - Returns: - True if we have seen the opening brace, False if the innermost - block is still expecting an opening brace. - """ - return (not self.stack) or self.stack[-1].seen_open_brace - - def InNamespaceBody(self): - """Check if we are currently one level inside a namespace body. - - Returns: - True if top of the stack is a namespace block, False otherwise. - """ - return self.stack and isinstance(self.stack[-1], _NamespaceInfo) - - def InExternC(self): - """Check if we are currently one level inside an 'extern "C"' block. - - Returns: - True if top of the stack is an extern block, False otherwise. - """ - return self.stack and isinstance(self.stack[-1], _ExternCInfo) - - def InClassDeclaration(self): - """Check if we are currently one level inside a class or struct declaration. - - Returns: - True if top of the stack is a class/struct, False otherwise. - """ - return self.stack and isinstance(self.stack[-1], _ClassInfo) - - def InAsmBlock(self): - """Check if we are currently one level inside an inline ASM block. - - Returns: - True if the top of the stack is a block containing inline ASM. - """ - return self.stack and self.stack[-1].inline_asm != _NO_ASM - - def InTemplateArgumentList(self, clean_lines, linenum, pos): - """Check if current position is inside template argument list. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - pos: position just after the suspected template argument. - Returns: - True if (linenum, pos) is inside template arguments. - """ - while linenum < clean_lines.NumLines(): - # Find the earliest character that might indicate a template argument - line = clean_lines.elided[linenum] - match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:]) - if not match: - linenum += 1 - pos = 0 - continue - token = match.group(1) - pos += len(match.group(0)) - - # These things do not look like template argument list: - # class Suspect { - # class Suspect x; } - if token in ('{', '}', ';'): return False - - # These things look like template argument list: - # template - # template - # template - # template - if token in ('>', '=', '[', ']', '.'): return True - - # Check if token is an unmatched '<'. - # If not, move on to the next character. - if token != '<': - pos += 1 - if pos >= len(line): - linenum += 1 - pos = 0 - continue - - # We can't be sure if we just find a single '<', and need to - # find the matching '>'. - (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1) - if end_pos < 0: - # Not sure if template argument list or syntax error in file - return False - linenum = end_line - pos = end_pos - return False - - def UpdatePreprocessor(self, line): - """Update preprocessor stack. - - We need to handle preprocessors due to classes like this: - #ifdef SWIG - struct ResultDetailsPageElementExtensionPoint { - #else - struct ResultDetailsPageElementExtensionPoint : public Extension { - #endif - - We make the following assumptions (good enough for most files): - - Preprocessor condition evaluates to true from #if up to first - #else/#elif/#endif. - - - Preprocessor condition evaluates to false from #else/#elif up - to #endif. We still perform lint checks on these lines, but - these do not affect nesting stack. - - Args: - line: current line to check. - """ - if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): - # Beginning of #if block, save the nesting stack here. The saved - # stack will allow us to restore the parsing state in the #else case. - self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) - elif Match(r'^\s*#\s*(else|elif)\b', line): - # Beginning of #else block - if self.pp_stack: - if not self.pp_stack[-1].seen_else: - # This is the first #else or #elif block. Remember the - # whole nesting stack up to this point. This is what we - # keep after the #endif. - self.pp_stack[-1].seen_else = True - self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack) - - # Restore the stack to how it was before the #if - self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) - else: - # TODO(unknown): unexpected #else, issue warning? - pass - elif Match(r'^\s*#\s*endif\b', line): - # End of #if or #else blocks. - if self.pp_stack: - # If we saw an #else, we will need to restore the nesting - # stack to its former state before the #else, otherwise we - # will just continue from where we left off. - if self.pp_stack[-1].seen_else: - # Here we can just use a shallow copy since we are the last - # reference to it. - self.stack = self.pp_stack[-1].stack_before_else - # Drop the corresponding #if - self.pp_stack.pop() - else: - # TODO(unknown): unexpected #endif, issue warning? - pass - - # TODO(unknown): Update() is too long, but we will refactor later. - def Update(self, filename, clean_lines, linenum, error): - """Update nesting state with current line. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Remember top of the previous nesting stack. - # - # The stack is always pushed/popped and not modified in place, so - # we can just do a shallow copy instead of copy.deepcopy. Using - # deepcopy would slow down cpplint by ~28%. - if self.stack: - self.previous_stack_top = self.stack[-1] - else: - self.previous_stack_top = None - - # Update pp_stack - self.UpdatePreprocessor(line) - - # Count parentheses. This is to avoid adding struct arguments to - # the nesting stack. - if self.stack: - inner_block = self.stack[-1] - depth_change = line.count('(') - line.count(')') - inner_block.open_parentheses += depth_change - - # Also check if we are starting or ending an inline assembly block. - if inner_block.inline_asm in (_NO_ASM, _END_ASM): - if (depth_change != 0 and - inner_block.open_parentheses == 1 and - _MATCH_ASM.match(line)): - # Enter assembly block - inner_block.inline_asm = _INSIDE_ASM - else: - # Not entering assembly block. If previous line was _END_ASM, - # we will now shift to _NO_ASM state. - inner_block.inline_asm = _NO_ASM - elif (inner_block.inline_asm == _INSIDE_ASM and - inner_block.open_parentheses == 0): - # Exit assembly block - inner_block.inline_asm = _END_ASM - - # Consume namespace declaration at the beginning of the line. Do - # this in a loop so that we catch same line declarations like this: - # namespace proto2 { namespace bridge { class MessageSet; } } - while True: - # Match start of namespace. The "\b\s*" below catches namespace - # declarations even if it weren't followed by a whitespace, this - # is so that we don't confuse our namespace checker. The - # missing spaces will be flagged by CheckSpacing. - namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line) - if not namespace_decl_match: - break - - new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum) - self.stack.append(new_namespace) - - line = namespace_decl_match.group(2) - if line.find('{') != -1: - new_namespace.seen_open_brace = True - line = line[line.find('{') + 1:] - - # Look for a class declaration in whatever is left of the line - # after parsing namespaces. The regexp accounts for decorated classes - # such as in: - # class LOCKABLE API Object { - # }; - class_decl_match = Match( - r'^(\s*(?:template\s*<[\w\s<>,:]*>\s*)?' - r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))' - r'(.*)$', line) - if (class_decl_match and - (not self.stack or self.stack[-1].open_parentheses == 0)): - # We do not want to accept classes that are actually template arguments: - # template , - # template class Ignore3> - # void Function() {}; - # - # To avoid template argument cases, we scan forward and look for - # an unmatched '>'. If we see one, assume we are inside a - # template argument list. - end_declaration = len(class_decl_match.group(1)) - if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration): - self.stack.append(_ClassInfo( - class_decl_match.group(3), class_decl_match.group(2), - clean_lines, linenum)) - line = class_decl_match.group(4) - - # If we have not yet seen the opening brace for the innermost block, - # run checks here. - if not self.SeenOpenBrace(): - self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) - - # Update access control if we are inside a class/struct - if self.stack and isinstance(self.stack[-1], _ClassInfo): - classinfo = self.stack[-1] - access_match = Match( - r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?' - r':(?:[^:]|$)', - line) - if access_match: - classinfo.access = access_match.group(2) - - # Check that access keywords are indented +1 space. Skip this - # check if the keywords are not preceded by whitespaces. - indent = access_match.group(1) - if (len(indent) != classinfo.class_indent + 1 and - Match(r'^\s*$', indent)): - if classinfo.is_struct: - parent = 'struct ' + classinfo.name - else: - parent = 'class ' + classinfo.name - slots = '' - if access_match.group(3): - slots = access_match.group(3) - error(filename, linenum, 'whitespace/indent', 3, - '%s%s: should be indented +1 space inside %s' % ( - access_match.group(2), slots, parent)) - - # Consume braces or semicolons from what's left of the line - while True: - # Match first brace, semicolon, or closed parenthesis. - matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line) - if not matched: - break - - token = matched.group(1) - if token == '{': - # If namespace or class hasn't seen a opening brace yet, mark - # namespace/class head as complete. Push a new block onto the - # stack otherwise. - if not self.SeenOpenBrace(): - self.stack[-1].seen_open_brace = True - elif Match(r'^extern\s*"[^"]*"\s*\{', line): - self.stack.append(_ExternCInfo(linenum)) - else: - self.stack.append(_BlockInfo(linenum, True)) - if _MATCH_ASM.match(line): - self.stack[-1].inline_asm = _BLOCK_ASM - - elif token == ';' or token == ')': - # If we haven't seen an opening brace yet, but we already saw - # a semicolon, this is probably a forward declaration. Pop - # the stack for these. - # - # Similarly, if we haven't seen an opening brace yet, but we - # already saw a closing parenthesis, then these are probably - # function arguments with extra "class" or "struct" keywords. - # Also pop these stack for these. - if not self.SeenOpenBrace(): - self.stack.pop() - else: # token == '}' - # Perform end of block checks and pop the stack. - if self.stack: - self.stack[-1].CheckEnd(filename, clean_lines, linenum, error) - self.stack.pop() - line = matched.group(2) - - def InnermostClass(self): - """Get class info on the top of the stack. - - Returns: - A _ClassInfo object if we are inside a class, or None otherwise. - """ - for i in range(len(self.stack), 0, -1): - classinfo = self.stack[i - 1] - if isinstance(classinfo, _ClassInfo): - return classinfo - return None - - def CheckCompletedBlocks(self, filename, error): - """Checks that all classes and namespaces have been completely parsed. - - Call this when all lines in a file have been processed. - Args: - filename: The name of the current file. - error: The function to call with any errors found. - """ - # Note: This test can result in false positives if #ifdef constructs - # get in the way of brace matching. See the testBuildClass test in - # cpplint_unittest.py for an example of this. - for obj in self.stack: - if isinstance(obj, _ClassInfo): - error(filename, obj.starting_linenum, 'build/class', 5, - 'Failed to find complete declaration of class %s' % - obj.name) - elif isinstance(obj, _NamespaceInfo): - error(filename, obj.starting_linenum, 'build/namespaces', 5, - 'Failed to find complete declaration of namespace %s' % - obj.name) - - -def CheckForNonStandardConstructs(filename, clean_lines, linenum, - nesting_state, error): - r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. - - Complain about several constructs which gcc-2 accepts, but which are - not standard C++. Warning about these in lint is one way to ease the - transition to new compilers. - - put storage class first (e.g. "static const" instead of "const static"). - - "%lld" instead of %qd" in printf-type functions. - - "%1$d" is non-standard in printf-type functions. - - "\%" is an undefined character escape sequence. - - text after #endif is not allowed. - - invalid inner-style forward declaration. - - >? and ?= and )\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', - line): - error(filename, linenum, 'build/deprecated', 3, - '>? and ))?' - # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;' - error(filename, linenum, 'runtime/member_string_references', 2, - 'const string& members are dangerous. It is much better to use ' - 'alternatives, such as pointers or simple constants.') - - # Everything else in this function operates on class declarations. - # Return early if the top of the nesting stack is not a class, or if - # the class head is not completed yet. - classinfo = nesting_state.InnermostClass() - if not classinfo or not classinfo.seen_open_brace: - return - - # The class may have been declared with namespace or classname qualifiers. - # The constructor and destructor will not have those qualifiers. - base_classname = classinfo.name.split('::')[-1] - - # Look for single-argument constructors that aren't marked explicit. - # Technically a valid construct, but against style. - explicit_constructor_match = Match( - r'\s+(?:(?:inline|constexpr)\s+)*(explicit\s+)?' - r'(?:(?:inline|constexpr)\s+)*%s\s*' - r'\(((?:[^()]|\([^()]*\))*)\)' - % re.escape(base_classname), - line) - - if explicit_constructor_match: - is_marked_explicit = explicit_constructor_match.group(1) - - if not explicit_constructor_match.group(2): - constructor_args = [] - else: - constructor_args = explicit_constructor_match.group(2).split(',') - - # collapse arguments so that commas in template parameter lists and function - # argument parameter lists don't split arguments in two - i = 0 - while i < len(constructor_args): - constructor_arg = constructor_args[i] - while (constructor_arg.count('<') > constructor_arg.count('>') or - constructor_arg.count('(') > constructor_arg.count(')')): - constructor_arg += ',' + constructor_args[i + 1] - del constructor_args[i + 1] - constructor_args[i] = constructor_arg - i += 1 - - defaulted_args = [arg for arg in constructor_args if '=' in arg] - noarg_constructor = (not constructor_args or # empty arg list - # 'void' arg specifier - (len(constructor_args) == 1 and - constructor_args[0].strip() == 'void')) - onearg_constructor = ((len(constructor_args) == 1 and # exactly one arg - not noarg_constructor) or - # all but at most one arg defaulted - (len(constructor_args) >= 1 and - not noarg_constructor and - len(defaulted_args) >= len(constructor_args) - 1)) - initializer_list_constructor = bool( - onearg_constructor and - Search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0])) - copy_constructor = bool( - onearg_constructor and - Match(r'(const\s+)?%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&' - % re.escape(base_classname), constructor_args[0].strip())) - - if (not is_marked_explicit and - onearg_constructor and - not initializer_list_constructor and - not copy_constructor): - if defaulted_args: - error(filename, linenum, 'runtime/explicit', 5, - 'Constructors callable with one argument ' - 'should be marked explicit.') - else: - error(filename, linenum, 'runtime/explicit', 5, - 'Single-parameter constructors should be marked explicit.') - elif is_marked_explicit and not onearg_constructor: - if noarg_constructor: - error(filename, linenum, 'runtime/explicit', 5, - 'Zero-parameter constructors should not be marked explicit.') - - -def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error): - """Checks for the correctness of various spacing around function calls. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Since function calls often occur inside if/for/while/switch - # expressions - which have their own, more liberal conventions - we - # first see if we should be looking inside such an expression for a - # function call, to which we can apply more strict standards. - fncall = line # if there's no control flow construct, look at whole line - for pattern in (r'\bif\s*\((.*)\)\s*{', - r'\bfor\s*\((.*)\)\s*{', - r'\bwhile\s*\((.*)\)\s*[{;]', - r'\bswitch\s*\((.*)\)\s*{'): - match = Search(pattern, line) - if match: - fncall = match.group(1) # look inside the parens for function calls - break - - # Except in if/for/while/switch, there should never be space - # immediately inside parens (eg "f( 3, 4 )"). We make an exception - # for nested parens ( (a+b) + c ). Likewise, there should never be - # a space before a ( when it's a function argument. I assume it's a - # function argument when the char before the whitespace is legal in - # a function name (alnum + _) and we're not starting a macro. Also ignore - # pointers and references to arrays and functions coz they're too tricky: - # we use a very simple way to recognize these: - # " (something)(maybe-something)" or - # " (something)(maybe-something," or - # " (something)[something]" - # Note that we assume the contents of [] to be short enough that - # they'll never need to wrap. - if ( # Ignore control structures. - not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b', - fncall) and - # Ignore pointers/references to functions. - not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and - # Ignore pointers/references to arrays. - not Search(r' \([^)]+\)\[[^\]]+\]', fncall)): - if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call - error(filename, linenum, 'whitespace/parens', 4, - 'Extra space after ( in function call') - elif Search(r'\(\s+(?!(\s*\\)|\()', fncall): - error(filename, linenum, 'whitespace/parens', 2, - 'Extra space after (') - if (Search(r'\w\s+\(', fncall) and - not Search(r'_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(', fncall) and - not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and - not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and - not Search(r'\bcase\s+\(', fncall)): - # TODO(unknown): Space after an operator function seem to be a common - # error, silence those for now by restricting them to highest verbosity. - if Search(r'\boperator_*\b', line): - error(filename, linenum, 'whitespace/parens', 0, - 'Extra space before ( in function call') - else: - error(filename, linenum, 'whitespace/parens', 4, - 'Extra space before ( in function call') - # If the ) is followed only by a newline or a { + newline, assume it's - # part of a control statement (if/while/etc), and don't complain - if Search(r'[^)]\s+\)\s*[^{\s]', fncall): - # If the closing parenthesis is preceded by only whitespaces, - # try to give a more descriptive error message. - if Search(r'^\s+\)', fncall): - error(filename, linenum, 'whitespace/parens', 2, - 'Closing ) should be moved to the previous line') - else: - error(filename, linenum, 'whitespace/parens', 2, - 'Extra space before )') - - -def IsBlankLine(line): - """Returns true if the given line is blank. - - We consider a line to be blank if the line is empty or consists of - only white spaces. - - Args: - line: A line of a string. - - Returns: - True, if the given line is blank. - """ - return not line or line.isspace() - - -def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, - error): - is_namespace_indent_item = ( - len(nesting_state.stack) > 1 and - nesting_state.stack[-1].check_namespace_indentation and - isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and - nesting_state.previous_stack_top == nesting_state.stack[-2]) - - if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, - clean_lines.elided, line): - CheckItemIndentationInNamespace(filename, clean_lines.elided, - line, error) - - -def CheckForFunctionLengths(filename, clean_lines, linenum, - function_state, error): - """Reports for long function bodies. - - For an overview why this is done, see: - https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions - - Uses a simplistic algorithm assuming other style guidelines - (especially spacing) are followed. - Only checks unindented functions, so class members are unchecked. - Trivial bodies are unchecked, so constructors with huge initializer lists - may be missed. - Blank/comment lines are not counted so as to avoid encouraging the removal - of vertical space and comments just to get through a lint check. - NOLINT *on the last line of a function* disables this check. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - function_state: Current function name and lines in body so far. - error: The function to call with any errors found. - """ - lines = clean_lines.lines - line = lines[linenum] - joined_line = '' - - starting_func = False - regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ... - match_result = Match(regexp, line) - if match_result: - # If the name is all caps and underscores, figure it's a macro and - # ignore it, unless it's TEST or TEST_F. - function_name = match_result.group(1).split()[-1] - if function_name == 'TEST' or function_name == 'TEST_F' or ( - not Match(r'[A-Z_]+$', function_name)): - starting_func = True - - if starting_func: - body_found = False - for start_linenum in xrange(linenum, clean_lines.NumLines()): - start_line = lines[start_linenum] - joined_line += ' ' + start_line.lstrip() - if Search(r'(;|})', start_line): # Declarations and trivial functions - body_found = True - break # ... ignore - elif Search(r'{', start_line): - body_found = True - function = Search(r'((\w|:)*)\(', line).group(1) - if Match(r'TEST', function): # Handle TEST... macros - parameter_regexp = Search(r'(\(.*\))', joined_line) - if parameter_regexp: # Ignore bad syntax - function += parameter_regexp.group(1) - else: - function += '()' - function_state.Begin(function) - break - if not body_found: - # No body for the function (or evidence of a non-function) was found. - error(filename, linenum, 'readability/fn_size', 5, - 'Lint failed to find start of function body.') - elif Match(r'^\}\s*$', line): # function end - function_state.Check(error, filename, linenum) - function_state.End() - elif not Match(r'^\s*$', line): - function_state.Count() # Count non-blank/non-comment lines. - - -_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?') - - -def CheckComment(line, filename, linenum, next_line_start, error): - """Checks for common mistakes in comments. - - Args: - line: The line in question. - filename: The name of the current file. - linenum: The number of the line to check. - next_line_start: The first non-whitespace column of the next line. - error: The function to call with any errors found. - """ - commentpos = line.find('//') - if commentpos != -1: - # Check if the // may be in quotes. If so, ignore it - if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0: - # Allow one space for new scopes, two spaces otherwise: - if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos) and - ((commentpos >= 1 and - line[commentpos-1] not in string.whitespace) or - (commentpos >= 2 and - line[commentpos-2] not in string.whitespace))): - error(filename, linenum, 'whitespace/comments', 2, - 'At least two spaces is best between code and comments') - - # Checks for common mistakes in TODO comments. - comment = line[commentpos:] - match = _RE_PATTERN_TODO.match(comment) - if match: - # One whitespace is correct; zero whitespace is handled elsewhere. - leading_whitespace = match.group(1) - if len(leading_whitespace) > 1: - error(filename, linenum, 'whitespace/todo', 2, - 'Too many spaces before TODO') - - username = match.group(2) - if not username: - error(filename, linenum, 'readability/todo', 2, - 'Missing username in TODO; it should look like ' - '"// TODO(my_username): Stuff."') - - middle_whitespace = match.group(3) - # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison - if middle_whitespace != ' ' and middle_whitespace != '': - error(filename, linenum, 'whitespace/todo', 2, - 'TODO(my_username) should be followed by a space') - - # If the comment contains an alphanumeric character, there - # should be a space somewhere between it and the // unless - # it's a /// or //! Doxygen comment. - if (Match(r'//[^ ]*\w', comment) and - not Match(r'(///|//\!)(\s+|$)', comment)): - error(filename, linenum, 'whitespace/comments', 4, - 'Should have a space between // and comment') - - -def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): - """Checks for the correctness of various spacing issues in the code. - - Things we check for: spaces around operators, spaces after - if/for/while/switch, no spaces around parens in function calls, two - spaces between code and comment, don't start a block with a blank - line, don't end a function with a blank line, don't add a blank line - after public/protected/private, don't have too many blank lines in a row. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - - # Don't use "elided" lines here, otherwise we can't check commented lines. - # Don't want to use "raw" either, because we don't want to check inside C++11 - # raw strings, - raw = clean_lines.lines_without_raw_strings - line = raw[linenum] - - # Before nixing comments, check if the line is blank for no good - # reason. This includes the first line after a block is opened, and - # blank lines at the end of a function (ie, right before a line like '}' - # - # Skip all the blank line checks if we are immediately inside a - # namespace body. In other words, don't issue blank line warnings - # for this block: - # namespace { - # - # } - # - # A warning about missing end of namespace comments will be issued instead. - # - # Also skip blank line checks for 'extern "C"' blocks, which are formatted - # like namespaces. - if (IsBlankLine(line) and - not nesting_state.InNamespaceBody() and - not nesting_state.InExternC()): - elided = clean_lines.elided - prev_line = elided[linenum - 1] - prevbrace = prev_line.rfind('{') - # TODO(unknown): Don't complain if line before blank line, and line after, - # both start with alnums and are indented the same amount. - # This ignores whitespace at the start of a namespace block - # because those are not usually indented. - if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1: - # OK, we have a blank line at the start of a code block. Before we - # complain, we check if it is an exception to the rule: The previous - # non-empty line has the parameters of a function header that are indented - # 4 spaces (because they did not fit in a 80 column line when placed on - # the same line as the function name). We also check for the case where - # the previous line is indented 6 spaces, which may happen when the - # initializers of a constructor do not fit into a 80 column line. - exception = False - if Match(r' {6}\w', prev_line): # Initializer list? - # We are looking for the opening column of initializer list, which - # should be indented 4 spaces to cause 6 space indentation afterwards. - search_position = linenum-2 - while (search_position >= 0 - and Match(r' {6}\w', elided[search_position])): - search_position -= 1 - exception = (search_position >= 0 - and elided[search_position][:5] == ' :') - else: - # Search for the function arguments or an initializer list. We use a - # simple heuristic here: If the line is indented 4 spaces; and we have a - # closing paren, without the opening paren, followed by an opening brace - # or colon (for initializer lists) we assume that it is the last line of - # a function header. If we have a colon indented 4 spaces, it is an - # initializer list. - exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', - prev_line) - or Match(r' {4}:', prev_line)) - - if not exception: - error(filename, linenum, 'whitespace/blank_line', 2, - 'Redundant blank line at the start of a code block ' - 'should be deleted.') - # Ignore blank lines at the end of a block in a long if-else - # chain, like this: - # if (condition1) { - # // Something followed by a blank line - # - # } else if (condition2) { - # // Something else - # } - if linenum + 1 < clean_lines.NumLines(): - next_line = raw[linenum + 1] - if (next_line - and Match(r'\s*}', next_line) - and next_line.find('} else ') == -1): - error(filename, linenum, 'whitespace/blank_line', 3, - 'Redundant blank line at the end of a code block ' - 'should be deleted.') - - matched = Match(r'\s*(public|protected|private):', prev_line) - if matched: - error(filename, linenum, 'whitespace/blank_line', 3, - 'Do not leave a blank line after "%s:"' % matched.group(1)) - - # Next, check comments - next_line_start = 0 - if linenum + 1 < clean_lines.NumLines(): - next_line = raw[linenum + 1] - next_line_start = len(next_line) - len(next_line.lstrip()) - CheckComment(line, filename, linenum, next_line_start, error) - - # get rid of comments and strings - line = clean_lines.elided[linenum] - - # You shouldn't have spaces before your brackets, except maybe after - # 'delete []' or 'return []() {};' - if Search(r'\w\s+\[', line) and not Search(r'(?:delete|return)\s+\[', line): - error(filename, linenum, 'whitespace/braces', 5, - 'Extra space before [') - - # In range-based for, we wanted spaces before and after the colon, but - # not around "::" tokens that might appear. - if (Search(r'for *\(.*[^:]:[^: ]', line) or - Search(r'for *\(.*[^: ]:[^:]', line)): - error(filename, linenum, 'whitespace/forcolon', 2, - 'Missing space around colon in range-based for loop') - - -def CheckOperatorSpacing(filename, clean_lines, linenum, error): - """Checks for horizontal spacing around operators. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Don't try to do spacing checks for operator methods. Do this by - # replacing the troublesome characters with something else, - # preserving column position for all other characters. - # - # The replacement is done repeatedly to avoid false positives from - # operators that call operators. - while True: - match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line) - if match: - line = match.group(1) + ('_' * len(match.group(2))) + match.group(3) - else: - break - - # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". - # Otherwise not. Note we only check for non-spaces on *both* sides; - # sometimes people put non-spaces on one side when aligning ='s among - # many lines (not that this is behavior that I approve of...) - if ((Search(r'[\w.]=', line) or - Search(r'=[\w.]', line)) - and not Search(r'\b(if|while|for) ', line) - # Operators taken from [lex.operators] in C++11 standard. - and not Search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line) - and not Search(r'operator=', line)): - error(filename, linenum, 'whitespace/operators', 4, - 'Missing spaces around =') - - # It's ok not to have spaces around binary operators like + - * /, but if - # there's too little whitespace, we get concerned. It's hard to tell, - # though, so we punt on this one for now. TODO. - - # You should always have whitespace around binary operators. - # - # Check <= and >= first to avoid false positives with < and >, then - # check non-include lines for spacing around < and >. - # - # If the operator is followed by a comma, assume it's be used in a - # macro context and don't do any checks. This avoids false - # positives. - # - # Note that && is not included here. This is because there are too - # many false positives due to RValue references. - match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line) - if match: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around %s' % match.group(1)) - elif not Match(r'#.*include', line): - # Look for < that is not surrounded by spaces. This is only - # triggered if both sides are missing spaces, even though - # technically should should flag if at least one side is missing a - # space. This is done to avoid some false positives with shifts. - match = Match(r'^(.*[^\s<])<[^\s=<,]', line) - if match: - (_, _, end_pos) = CloseExpression( - clean_lines, linenum, len(match.group(1))) - if end_pos <= -1: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around <') - - # Look for > that is not surrounded by spaces. Similar to the - # above, we only trigger if both sides are missing spaces to avoid - # false positives with shifts. - match = Match(r'^(.*[^-\s>])>[^\s=>,]', line) - if match: - (_, _, start_pos) = ReverseCloseExpression( - clean_lines, linenum, len(match.group(1))) - if start_pos <= -1: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around >') - - # We allow no-spaces around << when used like this: 10<<20, but - # not otherwise (particularly, not when used as streams) - # - # We also allow operators following an opening parenthesis, since - # those tend to be macros that deal with operators. - match = Search(r'(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])', line) - if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and - not (match.group(1) == 'operator' and match.group(2) == ';')): - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around <<') - - # We allow no-spaces around >> for almost anything. This is because - # C++11 allows ">>" to close nested templates, which accounts for - # most cases when ">>" is not followed by a space. - # - # We still warn on ">>" followed by alpha character, because that is - # likely due to ">>" being used for right shifts, e.g.: - # value >> alpha - # - # When ">>" is used to close templates, the alphanumeric letter that - # follows would be part of an identifier, and there should still be - # a space separating the template type and the identifier. - # type> alpha - match = Search(r'>>[a-zA-Z_]', line) - if match: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around >>') - - # There shouldn't be space around unary operators - match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) - if match: - error(filename, linenum, 'whitespace/operators', 4, - 'Extra space for operator %s' % match.group(1)) - - -def CheckParenthesisSpacing(filename, clean_lines, linenum, error): - """Checks for horizontal spacing around parentheses. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # No spaces after an if, while, switch, or for - match = Search(r' (if\(|for\(|while\(|switch\()', line) - if match: - error(filename, linenum, 'whitespace/parens', 5, - 'Missing space before ( in %s' % match.group(1)) - - # For if/for/while/switch, the left and right parens should be - # consistent about how many spaces are inside the parens, and - # there should either be zero or one spaces inside the parens. - # We don't want: "if ( foo)" or "if ( foo )". - # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. - match = Search(r'\b(if|for|while|switch)\s*' - r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', - line) - if match: - if len(match.group(2)) != len(match.group(4)): - if not (match.group(3) == ';' and - len(match.group(2)) == 1 + len(match.group(4)) or - not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)): - error(filename, linenum, 'whitespace/parens', 5, - 'Mismatching spaces inside () in %s' % match.group(1)) - if len(match.group(2)) not in [0, 1]: - error(filename, linenum, 'whitespace/parens', 5, - 'Should have zero or one spaces inside ( and ) in %s' % - match.group(1)) - - -def CheckCommaSpacing(filename, clean_lines, linenum, error): - """Checks for horizontal spacing near commas and semicolons. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - raw = clean_lines.lines_without_raw_strings - line = clean_lines.elided[linenum] - - # You should always have a space after a comma (either as fn arg or operator) - # - # This does not apply when the non-space character following the - # comma is another comma, since the only time when that happens is - # for empty macro arguments. - # - # We run this check in two passes: first pass on elided lines to - # verify that lines contain missing whitespaces, second pass on raw - # lines to confirm that those missing whitespaces are not due to - # elided comments. - if (Search(r',[^,\s]', ReplaceAll(r'\boperator\s*,\s*\(', 'F(', line)) and - Search(r',[^,\s]', raw[linenum])): - error(filename, linenum, 'whitespace/comma', 3, - 'Missing space after ,') - - # You should always have a space after a semicolon - # except for few corner cases - # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more - # space after ; - if Search(r';[^\s};\\)/]', line): - error(filename, linenum, 'whitespace/semicolon', 3, - 'Missing space after ;') - - -def _IsType(clean_lines, nesting_state, expr): - """Check if expression looks like a type name, returns true if so. - - Args: - clean_lines: A CleansedLines instance containing the file. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - expr: The expression to check. - Returns: - True, if token looks like a type. - """ - # Keep only the last token in the expression - last_word = Match(r'^.*(\b\S+)$', expr) - if last_word: - token = last_word.group(1) - else: - token = expr - - # Match native types and stdint types - if _TYPES.match(token): - return True - - # Try a bit harder to match templated types. Walk up the nesting - # stack until we find something that resembles a typename - # declaration for what we are looking for. - typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) + - r'\b') - block_index = len(nesting_state.stack) - 1 - while block_index >= 0: - if isinstance(nesting_state.stack[block_index], _NamespaceInfo): - return False - - # Found where the opening brace is. We want to scan from this - # line up to the beginning of the function, minus a few lines. - # template - # class C - # : public ... { // start scanning here - last_line = nesting_state.stack[block_index].starting_linenum - - next_block_start = 0 - if block_index > 0: - next_block_start = nesting_state.stack[block_index - 1].starting_linenum - first_line = last_line - while first_line >= next_block_start: - if clean_lines.elided[first_line].find('template') >= 0: - break - first_line -= 1 - if first_line < next_block_start: - # Didn't find any "template" keyword before reaching the next block, - # there are probably no template things to check for this block - block_index -= 1 - continue - - # Look for typename in the specified range - for i in xrange(first_line, last_line + 1, 1): - if Search(typename_pattern, clean_lines.elided[i]): - return True - block_index -= 1 - - return False - - -def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error): - """Checks for horizontal spacing near commas. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Except after an opening paren, or after another opening brace (in case of - # an initializer list, for instance), you should have spaces before your - # braces when they are delimiting blocks, classes, namespaces etc. - # And since you should never have braces at the beginning of a line, - # this is an easy test. Except that braces used for initialization don't - # follow the same rule; we often don't want spaces before those. - match = Match(r'^(.*[^ ({>]){', line) - - if match: - # Try a bit harder to check for brace initialization. This - # happens in one of the following forms: - # Constructor() : initializer_list_{} { ... } - # Constructor{}.MemberFunction() - # Type variable{}; - # FunctionCall(type{}, ...); - # LastArgument(..., type{}); - # LOG(INFO) << type{} << " ..."; - # map_of_type[{...}] = ...; - # ternary = expr ? new type{} : nullptr; - # OuterTemplate{}> - # - # We check for the character following the closing brace, and - # silence the warning if it's one of those listed above, i.e. - # "{.;,)<>]:". - # - # To account for nested initializer list, we allow any number of - # closing braces up to "{;,)<". We can't simply silence the - # warning on first sight of closing brace, because that would - # cause false negatives for things that are not initializer lists. - # Silence this: But not this: - # Outer{ if (...) { - # Inner{...} if (...){ // Missing space before { - # }; } - # - # There is a false negative with this approach if people inserted - # spurious semicolons, e.g. "if (cond){};", but we will catch the - # spurious semicolon with a separate check. - leading_text = match.group(1) - (endline, endlinenum, endpos) = CloseExpression( - clean_lines, linenum, len(match.group(1))) - trailing_text = '' - if endpos > -1: - trailing_text = endline[endpos:] - for offset in xrange(endlinenum + 1, - min(endlinenum + 3, clean_lines.NumLines() - 1)): - trailing_text += clean_lines.elided[offset] - # We also suppress warnings for `uint64_t{expression}` etc., as the style - # guide recommends brace initialization for integral types to avoid - # overflow/truncation. - if (not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text) - and not _IsType(clean_lines, nesting_state, leading_text)): - error(filename, linenum, 'whitespace/braces', 5, - 'Missing space before {') - - # Make sure '} else {' has spaces. - if Search(r'}else', line): - error(filename, linenum, 'whitespace/braces', 5, - 'Missing space before else') - - # You shouldn't have a space before a semicolon at the end of the line. - # There's a special case for "for" since the style guide allows space before - # the semicolon there. - if Search(r':\s*;\s*$', line): - error(filename, linenum, 'whitespace/semicolon', 5, - 'Semicolon defining empty statement. Use {} instead.') - elif Search(r'^\s*;\s*$', line): - error(filename, linenum, 'whitespace/semicolon', 5, - 'Line contains only semicolon. If this should be an empty statement, ' - 'use {} instead.') - elif (Search(r'\s+;\s*$', line) and - not Search(r'\bfor\b', line)): - error(filename, linenum, 'whitespace/semicolon', 5, - 'Extra space before last semicolon. If this should be an empty ' - 'statement, use {} instead.') - - -def IsDecltype(clean_lines, linenum, column): - """Check if the token ending on (linenum, column) is decltype(). - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: the number of the line to check. - column: end column of the token to check. - Returns: - True if this token is decltype() expression, False otherwise. - """ - (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column) - if start_col < 0: - return False - if Search(r'\bdecltype\s*$', text[0:start_col]): - return True - return False - - -def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): - """Checks for additional blank line issues related to sections. - - Currently the only thing checked here is blank line before protected/private. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - class_info: A _ClassInfo objects. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - # Skip checks if the class is small, where small means 25 lines or less. - # 25 lines seems like a good cutoff since that's the usual height of - # terminals, and any class that can't fit in one screen can't really - # be considered "small". - # - # Also skip checks if we are on the first line. This accounts for - # classes that look like - # class Foo { public: ... }; - # - # If we didn't find the end of the class, last_line would be zero, - # and the check will be skipped by the first condition. - if (class_info.last_line - class_info.starting_linenum <= 24 or - linenum <= class_info.starting_linenum): - return - - matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum]) - if matched: - # Issue warning if the line before public/protected/private was - # not a blank line, but don't do this if the previous line contains - # "class" or "struct". This can happen two ways: - # - We are at the beginning of the class. - # - We are forward-declaring an inner class that is semantically - # private, but needed to be public for implementation reasons. - # Also ignores cases where the previous line ends with a backslash as can be - # common when defining classes in C macros. - prev_line = clean_lines.lines[linenum - 1] - if (not IsBlankLine(prev_line) and - not Search(r'\b(class|struct)\b', prev_line) and - not Search(r'\\$', prev_line)): - # Try a bit harder to find the beginning of the class. This is to - # account for multi-line base-specifier lists, e.g.: - # class Derived - # : public Base { - end_class_head = class_info.starting_linenum - for i in range(class_info.starting_linenum, linenum): - if Search(r'\{\s*$', clean_lines.lines[i]): - end_class_head = i - break - if end_class_head < linenum - 1: - error(filename, linenum, 'whitespace/blank_line', 3, - '"%s:" should be preceded by a blank line' % matched.group(1)) - - -def GetPreviousNonBlankLine(clean_lines, linenum): - """Return the most recent non-blank line and its line number. - - Args: - clean_lines: A CleansedLines instance containing the file contents. - linenum: The number of the line to check. - - Returns: - A tuple with two elements. The first element is the contents of the last - non-blank line before the current line, or the empty string if this is the - first non-blank line. The second is the line number of that line, or -1 - if this is the first non-blank line. - """ - - prevlinenum = linenum - 1 - while prevlinenum >= 0: - prevline = clean_lines.elided[prevlinenum] - if not IsBlankLine(prevline): # if not a blank line... - return (prevline, prevlinenum) - prevlinenum -= 1 - return ('', -1) - - -def CheckBraces(filename, clean_lines, linenum, error): - """Looks for misplaced braces (e.g. at the end of line). - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - line = clean_lines.elided[linenum] # get rid of comments and strings - - if Match(r'\s*{\s*$', line): - # We allow an open brace to start a line in the case where someone is using - # braces in a block to explicitly create a new scope, which is commonly used - # to control the lifetime of stack-allocated variables. Braces are also - # used for brace initializers inside function calls. We don't detect this - # perfectly: we just don't complain if the last non-whitespace character on - # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the - # previous line starts a preprocessor block. We also allow a brace on the - # following line if it is part of an array initialization and would not fit - # within the 80 character limit of the preceding line. - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if (not Search(r'[,;:}{(]\s*$', prevline) and - not Match(r'\s*#', prevline) and - not (GetLineWidth(prevline) > _line_length - 2 and '[]' in prevline)): - error(filename, linenum, 'whitespace/braces', 4, - '{ should almost always be at the end of the previous line') - - # An else clause should be on the same line as the preceding closing brace. - if Match(r'\s*else\b\s*(?:if\b|\{|$)', line): - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if Match(r'\s*}\s*$', prevline): - error(filename, linenum, 'whitespace/newline', 4, - 'An else should appear on the same line as the preceding }') - - # If braces come on one side of an else, they should be on both. - # However, we have to worry about "else if" that spans multiple lines! - if Search(r'else if\s*\(', line): # could be multi-line if - brace_on_left = bool(Search(r'}\s*else if\s*\(', line)) - # find the ( after the if - pos = line.find('else if') - pos = line.find('(', pos) - if pos > 0: - (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos) - brace_on_right = endline[endpos:].find('{') != -1 - if brace_on_left != brace_on_right: # must be brace after if - error(filename, linenum, 'readability/braces', 5, - 'If an else has a brace on one side, it should have it on both') - elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line): - error(filename, linenum, 'readability/braces', 5, - 'If an else has a brace on one side, it should have it on both') - - # Likewise, an else should never have the else clause on the same line - if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line): - error(filename, linenum, 'whitespace/newline', 4, - 'Else clause should never be on same line as else (use 2 lines)') - - # In the same way, a do/while should never be on one line - if Match(r'\s*do [^\s{]', line): - error(filename, linenum, 'whitespace/newline', 4, - 'do/while clauses should not be on a single line') - - # Check single-line if/else bodies. The style guide says 'curly braces are not - # required for single-line statements'. We additionally allow multi-line, - # single statements, but we reject anything with more than one semicolon in - # it. This means that the first semicolon after the if should be at the end of - # its line, and the line after that should have an indent level equal to or - # lower than the if. We also check for ambiguous if/else nesting without - # braces. - if_else_match = Search(r'\b(if\s*\(|else\b)', line) - if if_else_match and not Match(r'\s*#', line): - if_indent = GetIndentLevel(line) - endline, endlinenum, endpos = line, linenum, if_else_match.end() - if_match = Search(r'\bif\s*\(', line) - if if_match: - # This could be a multiline if condition, so find the end first. - pos = if_match.end() - 1 - (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos) - # Check for an opening brace, either directly after the if or on the next - # line. If found, this isn't a single-statement conditional. - if (not Match(r'\s*{', endline[endpos:]) - and not (Match(r'\s*$', endline[endpos:]) - and endlinenum < (len(clean_lines.elided) - 1) - and Match(r'\s*{', clean_lines.elided[endlinenum + 1]))): - while (endlinenum < len(clean_lines.elided) - and ';' not in clean_lines.elided[endlinenum][endpos:]): - endlinenum += 1 - endpos = 0 - if endlinenum < len(clean_lines.elided): - endline = clean_lines.elided[endlinenum] - # We allow a mix of whitespace and closing braces (e.g. for one-liner - # methods) and a single \ after the semicolon (for macros) - endpos = endline.find(';') - if not Match(r';[\s}]*(\\?)$', endline[endpos:]): - # Semicolon isn't the last character, there's something trailing. - # Output a warning if the semicolon is not contained inside - # a lambda expression. - if not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$', - endline): - error(filename, linenum, 'readability/braces', 4, - 'If/else bodies with multiple statements require braces') - elif endlinenum < len(clean_lines.elided) - 1: - # Make sure the next line is dedented - next_line = clean_lines.elided[endlinenum + 1] - next_indent = GetIndentLevel(next_line) - # With ambiguous nested if statements, this will error out on the - # if that *doesn't* match the else, regardless of whether it's the - # inner one or outer one. - if (if_match and Match(r'\s*else\b', next_line) - and next_indent != if_indent): - error(filename, linenum, 'readability/braces', 4, - 'Else clause should be indented at the same level as if. ' - 'Ambiguous nested if/else chains require braces.') - elif next_indent > if_indent: - error(filename, linenum, 'readability/braces', 4, - 'If/else bodies with multiple statements require braces') - - -def CheckTrailingSemicolon(filename, clean_lines, linenum, error): - """Looks for redundant trailing semicolon. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - line = clean_lines.elided[linenum] - - # Block bodies should not be followed by a semicolon. Due to C++11 - # brace initialization, there are more places where semicolons are - # required than not, so we use a whitelist approach to check these - # rather than a blacklist. These are the places where "};" should - # be replaced by just "}": - # 1. Some flavor of block following closing parenthesis: - # for (;;) {}; - # while (...) {}; - # switch (...) {}; - # Function(...) {}; - # if (...) {}; - # if (...) else if (...) {}; - # - # 2. else block: - # if (...) else {}; - # - # 3. const member function: - # Function(...) const {}; - # - # 4. Block following some statement: - # x = 42; - # {}; - # - # 5. Block at the beginning of a function: - # Function(...) { - # {}; - # } - # - # Note that naively checking for the preceding "{" will also match - # braces inside multi-dimensional arrays, but this is fine since - # that expression will not contain semicolons. - # - # 6. Block following another block: - # while (true) {} - # {}; - # - # 7. End of namespaces: - # namespace {}; - # - # These semicolons seems far more common than other kinds of - # redundant semicolons, possibly due to people converting classes - # to namespaces. For now we do not warn for this case. - # - # Try matching case 1 first. - match = Match(r'^(.*\)\s*)\{', line) - if match: - # Matched closing parenthesis (case 1). Check the token before the - # matching opening parenthesis, and don't warn if it looks like a - # macro. This avoids these false positives: - # - macro that defines a base class - # - multi-line macro that defines a base class - # - macro that defines the whole class-head - # - # But we still issue warnings for macros that we know are safe to - # warn, specifically: - # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P - # - TYPED_TEST - # - INTERFACE_DEF - # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: - # - # We implement a whitelist of safe macros instead of a blacklist of - # unsafe macros, even though the latter appears less frequently in - # google code and would have been easier to implement. This is because - # the downside for getting the whitelist wrong means some extra - # semicolons, while the downside for getting the blacklist wrong - # would result in compile errors. - # - # In addition to macros, we also don't want to warn on - # - Compound literals - # - Lambdas - # - alignas specifier with anonymous structs - # - decltype - closing_brace_pos = match.group(1).rfind(')') - opening_parenthesis = ReverseCloseExpression( - clean_lines, linenum, closing_brace_pos) - if opening_parenthesis[2] > -1: - line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]] - macro = Search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix) - func = Match(r'^(.*\])\s*$', line_prefix) - if ((macro and - macro.group(1) not in ( - 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST', - 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED', - 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or - (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or - Search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or - Search(r'\bdecltype$', line_prefix) or - Search(r'\s+=\s*$', line_prefix)): - match = None - if (match and - opening_parenthesis[1] > 1 and - Search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])): - # Multi-line lambda-expression - match = None - - else: - # Try matching cases 2-3. - match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line) - if not match: - # Try matching cases 4-6. These are always matched on separate lines. - # - # Note that we can't simply concatenate the previous line to the - # current line and do a single match, otherwise we may output - # duplicate warnings for the blank line case: - # if (cond) { - # // blank line - # } - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if prevline and Search(r'[;{}]\s*$', prevline): - match = Match(r'^(\s*)\{', line) - - # Check matching closing brace - if match: - (endline, endlinenum, endpos) = CloseExpression( - clean_lines, linenum, len(match.group(1))) - if endpos > -1 and Match(r'^\s*;', endline[endpos:]): - # Current {} pair is eligible for semicolon check, and we have found - # the redundant semicolon, output warning here. - # - # Note: because we are scanning forward for opening braces, and - # outputting warnings for the matching closing brace, if there are - # nested blocks with trailing semicolons, we will get the error - # messages in reversed order. - - # We need to check the line forward for NOLINT - raw_lines = clean_lines.raw_lines - ParseNolintSuppressions(filename, raw_lines[endlinenum-1], endlinenum-1, - error) - ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum, - error) - - error(filename, endlinenum, 'readability/braces', 4, - "You don't need a ; after a }") - - -def CheckEmptyBlockBody(filename, clean_lines, linenum, error): - """Look for empty loop/conditional body with only a single semicolon. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - # Search for loop keywords at the beginning of the line. Because only - # whitespaces are allowed before the keywords, this will also ignore most - # do-while-loops, since those lines should start with closing brace. - # - # We also check "if" blocks here, since an empty conditional block - # is likely an error. - line = clean_lines.elided[linenum] - matched = Match(r'\s*(for|while|if)\s*\(', line) - if matched: - # Find the end of the conditional expression. - (end_line, end_linenum, end_pos) = CloseExpression( - clean_lines, linenum, line.find('(')) - - # Output warning if what follows the condition expression is a semicolon. - # No warning for all other cases, including whitespace or newline, since we - # have a separate check for semicolons preceded by whitespace. - if end_pos >= 0 and Match(r';', end_line[end_pos:]): - if matched.group(1) == 'if': - error(filename, end_linenum, 'whitespace/empty_conditional_body', 5, - 'Empty conditional bodies should use {}') - else: - error(filename, end_linenum, 'whitespace/empty_loop_body', 5, - 'Empty loop bodies should use {} or continue') - - # Check for if statements that have completely empty bodies (no comments) - # and no else clauses. - if end_pos >= 0 and matched.group(1) == 'if': - # Find the position of the opening { for the if statement. - # Return without logging an error if it has no brackets. - opening_linenum = end_linenum - opening_line_fragment = end_line[end_pos:] - # Loop until EOF or find anything that's not whitespace or opening {. - while not Search(r'^\s*\{', opening_line_fragment): - if Search(r'^(?!\s*$)', opening_line_fragment): - # Conditional has no brackets. - return - opening_linenum += 1 - if opening_linenum == len(clean_lines.elided): - # Couldn't find conditional's opening { or any code before EOF. - return - opening_line_fragment = clean_lines.elided[opening_linenum] - # Set opening_line (opening_line_fragment may not be entire opening line). - opening_line = clean_lines.elided[opening_linenum] - - # Find the position of the closing }. - opening_pos = opening_line_fragment.find('{') - if opening_linenum == end_linenum: - # We need to make opening_pos relative to the start of the entire line. - opening_pos += end_pos - (closing_line, closing_linenum, closing_pos) = CloseExpression( - clean_lines, opening_linenum, opening_pos) - if closing_pos < 0: - return - - # Now construct the body of the conditional. This consists of the portion - # of the opening line after the {, all lines until the closing line, - # and the portion of the closing line before the }. - if (clean_lines.raw_lines[opening_linenum] != - CleanseComments(clean_lines.raw_lines[opening_linenum])): - # Opening line ends with a comment, so conditional isn't empty. - return - if closing_linenum > opening_linenum: - # Opening line after the {. Ignore comments here since we checked above. - body = list(opening_line[opening_pos+1:]) - # All lines until closing line, excluding closing line, with comments. - body.extend(clean_lines.raw_lines[opening_linenum+1:closing_linenum]) - # Closing line before the }. Won't (and can't) have comments. - body.append(clean_lines.elided[closing_linenum][:closing_pos-1]) - body = '\n'.join(body) - else: - # If statement has brackets and fits on a single line. - body = opening_line[opening_pos+1:closing_pos-1] - - # Check if the body is empty - if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body): - return - # The body is empty. Now make sure there's not an else clause. - current_linenum = closing_linenum - current_line_fragment = closing_line[closing_pos:] - # Loop until EOF or find anything that's not whitespace or else clause. - while Search(r'^\s*$|^(?=\s*else)', current_line_fragment): - if Search(r'^(?=\s*else)', current_line_fragment): - # Found an else clause, so don't log an error. - return - current_linenum += 1 - if current_linenum == len(clean_lines.elided): - break - current_line_fragment = clean_lines.elided[current_linenum] - - # The body is empty and there's no else clause until EOF or other code. - error(filename, end_linenum, 'whitespace/empty_if_body', 4, - ('If statement had no body and no else clause')) - - -def FindCheckMacro(line): - """Find a replaceable CHECK-like macro. - - Args: - line: line to search on. - Returns: - (macro name, start position), or (None, -1) if no replaceable - macro is found. - """ - for macro in _CHECK_MACROS: - i = line.find(macro) - if i >= 0: - # Find opening parenthesis. Do a regular expression match here - # to make sure that we are matching the expected CHECK macro, as - # opposed to some other macro that happens to contain the CHECK - # substring. - matched = Match(r'^(.*\b' + macro + r'\s*)\(', line) - if not matched: - continue - return (macro, len(matched.group(1))) - return (None, -1) - - -def CheckCheck(filename, clean_lines, linenum, error): - """Checks the use of CHECK and EXPECT macros. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - # Decide the set of replacement macros that should be suggested - lines = clean_lines.elided - (check_macro, start_pos) = FindCheckMacro(lines[linenum]) - if not check_macro: - return - - # Find end of the boolean expression by matching parentheses - (last_line, end_line, end_pos) = CloseExpression( - clean_lines, linenum, start_pos) - if end_pos < 0: - return - - # If the check macro is followed by something other than a - # semicolon, assume users will log their own custom error messages - # and don't suggest any replacements. - if not Match(r'\s*;', last_line[end_pos:]): - return - - if linenum == end_line: - expression = lines[linenum][start_pos + 1:end_pos - 1] - else: - expression = lines[linenum][start_pos + 1:] - for i in xrange(linenum + 1, end_line): - expression += lines[i] - expression += last_line[0:end_pos - 1] - - # Parse expression so that we can take parentheses into account. - # This avoids false positives for inputs like "CHECK((a < 4) == b)", - # which is not replaceable by CHECK_LE. - lhs = '' - rhs = '' - operator = None - while expression: - matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||' - r'==|!=|>=|>|<=|<|\()(.*)$', expression) - if matched: - token = matched.group(1) - if token == '(': - # Parenthesized operand - expression = matched.group(2) - (end, _) = FindEndOfExpressionInLine(expression, 0, ['(']) - if end < 0: - return # Unmatched parenthesis - lhs += '(' + expression[0:end] - expression = expression[end:] - elif token in ('&&', '||'): - # Logical and/or operators. This means the expression - # contains more than one term, for example: - # CHECK(42 < a && a < b); - # - # These are not replaceable with CHECK_LE, so bail out early. - return - elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'): - # Non-relational operator - lhs += token - expression = matched.group(2) - else: - # Relational operator - operator = token - rhs = matched.group(2) - break - else: - # Unparenthesized operand. Instead of appending to lhs one character - # at a time, we do another regular expression match to consume several - # characters at once if possible. Trivial benchmark shows that this - # is more efficient when the operands are longer than a single - # character, which is generally the case. - matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression) - if not matched: - matched = Match(r'^(\s*\S)(.*)$', expression) - if not matched: - break - lhs += matched.group(1) - expression = matched.group(2) - - # Only apply checks if we got all parts of the boolean expression - if not (lhs and operator and rhs): - return - - # Check that rhs do not contain logical operators. We already know - # that lhs is fine since the loop above parses out && and ||. - if rhs.find('&&') > -1 or rhs.find('||') > -1: - return - - # At least one of the operands must be a constant literal. This is - # to avoid suggesting replacements for unprintable things like - # CHECK(variable != iterator) - # - # The following pattern matches decimal, hex integers, strings, and - # characters (in that order). - lhs = lhs.strip() - rhs = rhs.strip() - match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' - if Match(match_constant, lhs) or Match(match_constant, rhs): - # Note: since we know both lhs and rhs, we can provide a more - # descriptive error message like: - # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) - # Instead of: - # Consider using CHECK_EQ instead of CHECK(a == b) - # - # We are still keeping the less descriptive message because if lhs - # or rhs gets long, the error message might become unreadable. - error(filename, linenum, 'readability/check', 2, - 'Consider using %s instead of %s(a %s b)' % ( - _CHECK_REPLACEMENT[check_macro][operator], - check_macro, operator)) - - -def CheckAltTokens(filename, clean_lines, linenum, error): - """Check alternative keywords being used in boolean expressions. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Avoid preprocessor lines - if Match(r'^\s*#', line): - return - - # Last ditch effort to avoid multi-line comments. This will not help - # if the comment started before the current line or ended after the - # current line, but it catches most of the false positives. At least, - # it provides a way to workaround this warning for people who use - # multi-line comments in preprocessor macros. - # - # TODO(unknown): remove this once cpplint has better support for - # multi-line comments. - if line.find('/*') >= 0 or line.find('*/') >= 0: - return - - for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): - error(filename, linenum, 'readability/alt_tokens', 2, - 'Use operator %s instead of %s' % ( - _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1))) - - -def GetLineWidth(line): - """Determines the width of the line in column positions. - - Args: - line: A string, which may be a Unicode string. - - Returns: - The width of the line in column positions, accounting for Unicode - combining characters and wide characters. - """ - if isinstance(line, unicode): - width = 0 - for uc in unicodedata.normalize('NFC', line): - if unicodedata.east_asian_width(uc) in ('W', 'F'): - width += 2 - elif not unicodedata.combining(uc): - width += 1 - return width - else: - return len(line) - - -def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, - error): - """Checks rules from the 'C++ style rules' section of cppguide.html. - - Most of these rules are hard to test (naming, comment style), but we - do what we can. In particular we check for 2-space indents, line lengths, - tab usage, spaces inside code, etc. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - file_extension: The extension (without the dot) of the filename. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - - # Don't use "elided" lines here, otherwise we can't check commented lines. - # Don't want to use "raw" either, because we don't want to check inside C++11 - # raw strings, - raw_lines = clean_lines.lines_without_raw_strings - line = raw_lines[linenum] - prev = raw_lines[linenum - 1] if linenum > 0 else '' - - if line.find('\t') != -1: - error(filename, linenum, 'whitespace/tab', 1, - 'Tab found; better to use spaces') - - # One or three blank spaces at the beginning of the line is weird; it's - # hard to reconcile that with 2-space indents. - # NOTE: here are the conditions rob pike used for his tests. Mine aren't - # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces - # if(RLENGTH > 20) complain = 0; - # if(match($0, " +(error|private|public|protected):")) complain = 0; - # if(match(prev, "&& *$")) complain = 0; - # if(match(prev, "\\|\\| *$")) complain = 0; - # if(match(prev, "[\",=><] *$")) complain = 0; - # if(match($0, " <<")) complain = 0; - # if(match(prev, " +for \\(")) complain = 0; - # if(prevodd && match(prevprev, " +for \\(")) complain = 0; - scope_or_label_pattern = r'\s*\w+\s*:\s*\\?$' - classinfo = nesting_state.InnermostClass() - initial_spaces = 0 - cleansed_line = clean_lines.elided[linenum] - while initial_spaces < len(line) and line[initial_spaces] == ' ': - initial_spaces += 1 - # There are certain situations we allow one space, notably for - # section labels, and also lines containing multi-line raw strings. - # We also don't check for lines that look like continuation lines - # (of lines ending in double quotes, commas, equals, or angle brackets) - # because the rules for how to indent those are non-trivial. - if (not Search(r'[",=><] *$', prev) and - (initial_spaces == 1 or initial_spaces == 3) and - not Match(scope_or_label_pattern, cleansed_line) and - not (clean_lines.raw_lines[linenum] != line and - Match(r'^\s*""', line))): - error(filename, linenum, 'whitespace/indent', 3, - 'Weird number of spaces at line-start. ' - 'Are you using a 2-space indent?') - - if line and line[-1].isspace(): - error(filename, linenum, 'whitespace/end_of_line', 4, - 'Line ends in whitespace. Consider deleting these extra spaces.') - - # Check if the line is a header guard. - is_header_guard = False - if IsHeaderExtension(file_extension): - cppvar = GetHeaderGuardCPPVariable(filename) - if (line.startswith('#ifndef %s' % cppvar) or - line.startswith('#define %s' % cppvar) or - line.startswith('#endif // %s' % cppvar)): - is_header_guard = True - # #include lines and header guards can be long, since there's no clean way to - # split them. - # - # URLs can be long too. It's possible to split these, but it makes them - # harder to cut&paste. - # - # The "$Id:...$" comment may also get very long without it being the - # developers fault. - if (not line.startswith('#include') and not is_header_guard and - not Match(r'^\s*//.*http(s?)://\S*$', line) and - not Match(r'^\s*//\s*[^\s]*$', line) and - not Match(r'^// \$Id:.*#[0-9]+ \$$', line)): - line_width = GetLineWidth(line) - if line_width > _line_length: - error(filename, linenum, 'whitespace/line_length', 2, - 'Lines should be <= %i characters long' % _line_length) - - if (cleansed_line.count(';') > 1 and - # for loops are allowed two ;'s (and may run over two lines). - cleansed_line.find('for') == -1 and - (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or - GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and - # It's ok to have many commands in a switch case that fits in 1 line - not ((cleansed_line.find('case ') != -1 or - cleansed_line.find('default:') != -1) and - cleansed_line.find('break;') != -1)): - error(filename, linenum, 'whitespace/newline', 0, - 'More than one command on the same line') - - # Some more style checks - CheckBraces(filename, clean_lines, linenum, error) - CheckTrailingSemicolon(filename, clean_lines, linenum, error) - CheckEmptyBlockBody(filename, clean_lines, linenum, error) - CheckSpacing(filename, clean_lines, linenum, nesting_state, error) - CheckOperatorSpacing(filename, clean_lines, linenum, error) - CheckParenthesisSpacing(filename, clean_lines, linenum, error) - CheckCommaSpacing(filename, clean_lines, linenum, error) - CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error) - CheckSpacingForFunctionCall(filename, clean_lines, linenum, error) - CheckCheck(filename, clean_lines, linenum, error) - CheckAltTokens(filename, clean_lines, linenum, error) - classinfo = nesting_state.InnermostClass() - if classinfo: - CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) - - -_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') -# Matches the first component of a filename delimited by -s and _s. That is: -# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' -# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo' -# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo' -# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo' -_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') - - -def _DropCommonSuffixes(filename): - """Drops common suffixes like _test.cc or -inl.h from filename. - - For example: - >>> _DropCommonSuffixes('foo/foo-inl.h') - 'foo/foo' - >>> _DropCommonSuffixes('foo/bar/foo.cc') - 'foo/bar/foo' - >>> _DropCommonSuffixes('foo/foo_internal.h') - 'foo/foo' - >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') - 'foo/foo_unusualinternal' - - Args: - filename: The input filename. - - Returns: - The filename with the common suffix removed. - """ - for suffix in ('test.cc', 'regtest.cc', 'unittest.cc', - 'inl.h', 'impl.h', 'internal.h'): - if (filename.endswith(suffix) and len(filename) > len(suffix) and - filename[-len(suffix) - 1] in ('-', '_')): - return filename[:-len(suffix) - 1] - return os.path.splitext(filename)[0] - - -def _ClassifyInclude(fileinfo, include, is_system): - """Figures out what kind of header 'include' is. - - Args: - fileinfo: The current file cpplint is running over. A FileInfo instance. - include: The path to a #included file. - is_system: True if the #include used <> rather than "". - - Returns: - One of the _XXX_HEADER constants. - - For example: - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) - _C_SYS_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) - _CPP_SYS_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) - _LIKELY_MY_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), - ... 'bar/foo_other_ext.h', False) - _POSSIBLE_MY_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) - _OTHER_HEADER - """ - # This is a list of all standard c++ header files, except - # those already checked for above. - is_cpp_h = include in _CPP_HEADERS - - if is_system: - if is_cpp_h: - return _CPP_SYS_HEADER - else: - return _C_SYS_HEADER - - # If the target file and the include we're checking share a - # basename when we drop common extensions, and the include - # lives in . , then it's likely to be owned by the target file. - target_dir, target_base = ( - os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName()))) - include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) - if target_base == include_base and ( - include_dir == target_dir or - include_dir == os.path.normpath(target_dir + '/../public')): - return _LIKELY_MY_HEADER - - # If the target and include share some initial basename - # component, it's possible the target is implementing the - # include, so it's allowed to be first, but we'll never - # complain if it's not there. - target_first_component = _RE_FIRST_COMPONENT.match(target_base) - include_first_component = _RE_FIRST_COMPONENT.match(include_base) - if (target_first_component and include_first_component and - target_first_component.group(0) == - include_first_component.group(0)): - return _POSSIBLE_MY_HEADER - - return _OTHER_HEADER - - - -def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): - """Check rules that are applicable to #include lines. - - Strings on #include lines are NOT removed from elided line, to make - certain tasks easier. However, to prevent false positives, checks - applicable to #include lines in CheckLanguage must be put here. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - include_state: An _IncludeState instance in which the headers are inserted. - error: The function to call with any errors found. - """ - fileinfo = FileInfo(filename) - line = clean_lines.lines[linenum] - - # "include" should use the new style "foo/bar.h" instead of just "bar.h" - # Only do this check if the included header follows google naming - # conventions. If not, assume that it's a 3rd party API that - # requires special include conventions. - # - # We also make an exception for Lua headers, which follow google - # naming convention but not the include convention. - match = Match(r'#include\s*"([^/]+\.h)"', line) - if match and not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1)): - error(filename, linenum, 'build/include', 4, - 'Include the directory when naming .h files') - - # we shouldn't include a file more than once. actually, there are a - # handful of instances where doing so is okay, but in general it's - # not. - match = _RE_PATTERN_INCLUDE.search(line) - if match: - include = match.group(2) - is_system = (match.group(1) == '<') - duplicate_line = include_state.FindHeader(include) - if duplicate_line >= 0: - error(filename, linenum, 'build/include', 4, - '"%s" already included at %s:%s' % - (include, filename, duplicate_line)) - elif (include.endswith('.cc') and - os.path.dirname(fileinfo.RepositoryName()) != os.path.dirname(include)): - error(filename, linenum, 'build/include', 4, - 'Do not include .cc files from other packages') - elif not _THIRD_PARTY_HEADERS_PATTERN.match(include): - include_state.include_list[-1].append((include, linenum)) - - # We want to ensure that headers appear in the right order: - # 1) for foo.cc, foo.h (preferred location) - # 2) c system files - # 3) cpp system files - # 4) for foo.cc, foo.h (deprecated location) - # 5) other google headers - # - # We classify each include statement as one of those 5 types - # using a number of techniques. The include_state object keeps - # track of the highest type seen, and complains if we see a - # lower type after that. - error_message = include_state.CheckNextIncludeOrder( - _ClassifyInclude(fileinfo, include, is_system)) - if error_message: - error(filename, linenum, 'build/include_order', 4, - '%s. Should be: %s.h, c system, c++ system, other.' % - (error_message, fileinfo.BaseName())) - canonical_include = include_state.CanonicalizeAlphabeticalOrder(include) - if not include_state.IsInAlphabeticalOrder( - clean_lines, linenum, canonical_include): - error(filename, linenum, 'build/include_alpha', 4, - 'Include "%s" not in alphabetical order' % include) - include_state.SetLastHeader(canonical_include) - - - -def _GetTextInside(text, start_pattern): - r"""Retrieves all the text between matching open and close parentheses. - - Given a string of lines and a regular expression string, retrieve all the text - following the expression and between opening punctuation symbols like - (, [, or {, and the matching close-punctuation symbol. This properly nested - occurrences of the punctuations, so for the text like - printf(a(), b(c())); - a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'. - start_pattern must match string having an open punctuation symbol at the end. - - Args: - text: The lines to extract text. Its comments and strings must be elided. - It can be single line and can span multiple lines. - start_pattern: The regexp string indicating where to start extracting - the text. - Returns: - The extracted text. - None if either the opening string or ending punctuation could not be found. - """ - # TODO(unknown): Audit cpplint.py to see what places could be profitably - # rewritten to use _GetTextInside (and use inferior regexp matching today). - - # Give opening punctuations to get the matching close-punctuations. - matching_punctuation = {'(': ')', '{': '}', '[': ']'} - closing_punctuation = set(matching_punctuation.itervalues()) - - # Find the position to start extracting text. - match = re.search(start_pattern, text, re.M) - if not match: # start_pattern not found in text. - return None - start_position = match.end(0) - - assert start_position > 0, ( - 'start_pattern must ends with an opening punctuation.') - assert text[start_position - 1] in matching_punctuation, ( - 'start_pattern must ends with an opening punctuation.') - # Stack of closing punctuations we expect to have in text after position. - punctuation_stack = [matching_punctuation[text[start_position - 1]]] - position = start_position - while punctuation_stack and position < len(text): - if text[position] == punctuation_stack[-1]: - punctuation_stack.pop() - elif text[position] in closing_punctuation: - # A closing punctuation without matching opening punctuations. - return None - elif text[position] in matching_punctuation: - punctuation_stack.append(matching_punctuation[text[position]]) - position += 1 - if punctuation_stack: - # Opening punctuations left without matching close-punctuations. - return None - # punctuations match. - return text[start_position:position - 1] - - -# Patterns for matching call-by-reference parameters. -# -# Supports nested templates up to 2 levels deep using this messy pattern: -# < (?: < (?: < [^<>]* -# > -# | [^<>] )* -# > -# | [^<>] )* -# > -_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]* -_RE_PATTERN_TYPE = ( - r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?' - r'(?:\w|' - r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|' - r'::)+') -# A call-by-reference parameter ends with '& identifier'. -_RE_PATTERN_REF_PARAM = re.compile( - r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*' - r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]') -# A call-by-const-reference parameter either ends with 'const& identifier' -# or looks like 'const type& identifier' when 'type' is atomic. -_RE_PATTERN_CONST_REF_PARAM = ( - r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT + - r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')') -# Stream types. -_RE_PATTERN_REF_STREAM_PARAM = ( - r'(?:.*stream\s*&\s*' + _RE_PATTERN_IDENT + r')') - - -def CheckLanguage(filename, clean_lines, linenum, file_extension, - include_state, nesting_state, error): - """Checks rules from the 'C++ language rules' section of cppguide.html. - - Some of these rules are hard to test (function overloading, using - uint32 inappropriately), but we do the best we can. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - file_extension: The extension (without the dot) of the filename. - include_state: An _IncludeState instance in which the headers are inserted. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - # If the line is empty or consists of entirely a comment, no need to - # check it. - line = clean_lines.elided[linenum] - if not line: - return - - match = _RE_PATTERN_INCLUDE.search(line) - if match: - CheckIncludeLine(filename, clean_lines, linenum, include_state, error) - return - - # Reset include state across preprocessor directives. This is meant - # to silence warnings for conditional includes. - match = Match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line) - if match: - include_state.ResetSection(match.group(1)) - - # Make Windows paths like Unix. - fullname = os.path.abspath(filename).replace('\\', '/') - - # Perform other checks now that we are sure that this is not an include line - CheckCasts(filename, clean_lines, linenum, error) - CheckGlobalStatic(filename, clean_lines, linenum, error) - CheckPrintf(filename, clean_lines, linenum, error) - - if IsHeaderExtension(file_extension): - # TODO(unknown): check that 1-arg constructors are explicit. - # How to tell it's a constructor? - # (handled in CheckForNonStandardConstructs for now) - # TODO(unknown): check that classes declare or disable copy/assign - # (level 1 error) - pass - - # Check if people are using the verboten C basic types. The only exception - # we regularly allow is "unsigned short port" for port. - if Search(r'\bshort port\b', line): - if not Search(r'\bunsigned short port\b', line): - error(filename, linenum, 'runtime/int', 4, - 'Use "unsigned short" for ports, not "short"') - else: - match = Search(r'\b(short|long(?! +double)|long long)\b', line) - if match: - error(filename, linenum, 'runtime/int', 4, - 'Use int16/int64/etc, rather than the C type %s' % match.group(1)) - - # Check if some verboten operator overloading is going on - # TODO(unknown): catch out-of-line unary operator&: - # class X {}; - # int operator&(const X& x) { return 42; } // unary operator& - # The trick is it's hard to tell apart from binary operator&: - # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& - if Search(r'\boperator\s*&\s*\(\s*\)', line): - error(filename, linenum, 'runtime/operator', 4, - 'Unary operator& is dangerous. Do not use it.') - - # Check for suspicious usage of "if" like - # } if (a == b) { - if Search(r'\}\s*if\s*\(', line): - error(filename, linenum, 'readability/braces', 4, - 'Did you mean "else if"? If not, start a new line for "if".') - - # Check for potential format string bugs like printf(foo). - # We constrain the pattern not to pick things like DocidForPrintf(foo). - # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) - # TODO(unknown): Catch the following case. Need to change the calling - # convention of the whole function to process multiple line to handle it. - # printf( - # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); - printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(') - if printf_args: - match = Match(r'([\w.\->()]+)$', printf_args) - if match and match.group(1) != '__VA_ARGS__': - function_name = re.search(r'\b((?:string)?printf)\s*\(', - line, re.I).group(1) - error(filename, linenum, 'runtime/printf', 4, - 'Potential format string bug. Do %s("%%s", %s) instead.' - % (function_name, match.group(1))) - - # Check for potential memset bugs like memset(buf, sizeof(buf), 0). - match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) - if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): - error(filename, linenum, 'runtime/memset', 4, - 'Did you mean "memset(%s, 0, %s)"?' - % (match.group(1), match.group(2))) - - if Search(r'\busing namespace\b', line): - error(filename, linenum, 'build/namespaces', 5, - 'Do not use namespace using-directives. ' - 'Use using-declarations instead.') - - # Detect variable-length arrays. - match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) - if (match and match.group(2) != 'return' and match.group(2) != 'delete' and - match.group(3).find(']') == -1): - # Split the size using space and arithmetic operators as delimiters. - # If any of the resulting tokens are not compile time constants then - # report the error. - tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3)) - is_const = True - skip_next = False - for tok in tokens: - if skip_next: - skip_next = False - continue - - if Search(r'sizeof\(.+\)', tok): continue - if Search(r'arraysize\(\w+\)', tok): continue - - tok = tok.lstrip('(') - tok = tok.rstrip(')') - if not tok: continue - if Match(r'\d+', tok): continue - if Match(r'0[xX][0-9a-fA-F]+', tok): continue - if Match(r'k[A-Z0-9]\w*', tok): continue - if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue - if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue - # A catch all for tricky sizeof cases, including 'sizeof expression', - # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' - # requires skipping the next token because we split on ' ' and '*'. - if tok.startswith('sizeof'): - skip_next = True - continue - is_const = False - break - if not is_const: - error(filename, linenum, 'runtime/arrays', 1, - 'Do not use variable-length arrays. Use an appropriately named ' - "('k' followed by CamelCase) compile-time constant for the size.") - - # Check for use of unnamed namespaces in header files. Registration - # macros are typically OK, so we allow use of "namespace {" on lines - # that end with backslashes. - if (IsHeaderExtension(file_extension) - and Search(r'\bnamespace\s*{', line) - and line[-1] != '\\'): - error(filename, linenum, 'build/namespaces', 4, - 'Do not use unnamed namespaces in header files. See ' - 'https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' - ' for more information.') - - -def CheckGlobalStatic(filename, clean_lines, linenum, error): - """Check for unsafe global or static objects. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Match two lines at a time to support multiline declarations - if linenum + 1 < clean_lines.NumLines() and not Search(r'[;({]', line): - line += clean_lines.elided[linenum + 1].strip() - - # Check for people declaring static/global STL strings at the top level. - # This is dangerous because the C++ language does not guarantee that - # globals with constructors are initialized before the first access, and - # also because globals can be destroyed when some threads are still running. - # TODO(unknown): Generalize this to also find static unique_ptr instances. - # TODO(unknown): File bugs for clang-tidy to find these. - match = Match( - r'((?:|static +)(?:|const +))(?::*std::)?string( +const)? +' - r'([a-zA-Z0-9_:]+)\b(.*)', - line) - - # Remove false positives: - # - String pointers (as opposed to values). - # string *pointer - # const string *pointer - # string const *pointer - # string *const pointer - # - # - Functions and template specializations. - # string Function(... - # string Class::Method(... - # - # - Operators. These are matched separately because operator names - # cross non-word boundaries, and trying to match both operators - # and functions at the same time would decrease accuracy of - # matching identifiers. - # string Class::operator*() - if (match and - not Search(r'\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w', line) and - not Search(r'\boperator\W', line) and - not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4))): - if Search(r'\bconst\b', line): - error(filename, linenum, 'runtime/string', 4, - 'For a static/global string constant, use a C style string ' - 'instead: "%schar%s %s[]".' % - (match.group(1), match.group(2) or '', match.group(3))) - else: - error(filename, linenum, 'runtime/string', 4, - 'Static/global string variables are not permitted.') - - if (Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or - Search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)): - error(filename, linenum, 'runtime/init', 4, - 'You seem to be initializing a member variable with itself.') - - -def CheckPrintf(filename, clean_lines, linenum, error): - """Check for printf related issues. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # When snprintf is used, the second argument shouldn't be a literal. - match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) - if match and match.group(2) != '0': - # If 2nd arg is zero, snprintf is used to calculate size. - error(filename, linenum, 'runtime/printf', 3, - 'If you can, use sizeof(%s) instead of %s as the 2nd arg ' - 'to snprintf.' % (match.group(1), match.group(2))) - - # Check if some verboten C functions are being used. - if Search(r'\bsprintf\s*\(', line): - error(filename, linenum, 'runtime/printf', 5, - 'Never use sprintf. Use snprintf instead.') - match = Search(r'\b(strcpy|strcat)\s*\(', line) - if match: - error(filename, linenum, 'runtime/printf', 4, - 'Almost always, snprintf is better than %s' % match.group(1)) - - -def IsDerivedFunction(clean_lines, linenum): - """Check if current line contains an inherited function. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - Returns: - True if current line contains a function with "override" - virt-specifier. - """ - # Scan back a few lines for start of current function - for i in xrange(linenum, max(-1, linenum - 10), -1): - match = Match(r'^([^()]*\w+)\(', clean_lines.elided[i]) - if match: - # Look for "override" after the matching closing parenthesis - line, _, closing_paren = CloseExpression( - clean_lines, i, len(match.group(1))) - return (closing_paren >= 0 and - Search(r'\boverride\b', line[closing_paren:])) - return False - - -def IsOutOfLineMethodDefinition(clean_lines, linenum): - """Check if current line contains an out-of-line method definition. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - Returns: - True if current line contains an out-of-line method definition. - """ - # Scan back a few lines for start of current function - for i in xrange(linenum, max(-1, linenum - 10), -1): - if Match(r'^([^()]*\w+)\(', clean_lines.elided[i]): - return Match(r'^[^()]*\w+::\w+\(', clean_lines.elided[i]) is not None - return False - - -def IsInitializerList(clean_lines, linenum): - """Check if current line is inside constructor initializer list. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - Returns: - True if current line appears to be inside constructor initializer - list, False otherwise. - """ - for i in xrange(linenum, 1, -1): - line = clean_lines.elided[i] - if i == linenum: - remove_function_body = Match(r'^(.*)\{\s*$', line) - if remove_function_body: - line = remove_function_body.group(1) - - if Search(r'\s:\s*\w+[({]', line): - # A lone colon tend to indicate the start of a constructor - # initializer list. It could also be a ternary operator, which - # also tend to appear in constructor initializer lists as - # opposed to parameter lists. - return True - if Search(r'\}\s*,\s*$', line): - # A closing brace followed by a comma is probably the end of a - # brace-initialized member in constructor initializer list. - return True - if Search(r'[{};]\s*$', line): - # Found one of the following: - # - A closing brace or semicolon, probably the end of the previous - # function. - # - An opening brace, probably the start of current class or namespace. - # - # Current line is probably not inside an initializer list since - # we saw one of those things without seeing the starting colon. - return False - - # Got to the beginning of the file without seeing the start of - # constructor initializer list. - return False - - -def CheckForNonConstReference(filename, clean_lines, linenum, - nesting_state, error): - """Check for non-const references. - - Separate from CheckLanguage since it scans backwards from current - line, instead of scanning forward. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - # Do nothing if there is no '&' on current line. - line = clean_lines.elided[linenum] - if '&' not in line: - return - - # If a function is inherited, current function doesn't have much of - # a choice, so any non-const references should not be blamed on - # derived function. - if IsDerivedFunction(clean_lines, linenum): - return - - # Don't warn on out-of-line method definitions, as we would warn on the - # in-line declaration, if it isn't marked with 'override'. - if IsOutOfLineMethodDefinition(clean_lines, linenum): - return - - # Long type names may be broken across multiple lines, usually in one - # of these forms: - # LongType - # ::LongTypeContinued &identifier - # LongType:: - # LongTypeContinued &identifier - # LongType< - # ...>::LongTypeContinued &identifier - # - # If we detected a type split across two lines, join the previous - # line to current line so that we can match const references - # accordingly. - # - # Note that this only scans back one line, since scanning back - # arbitrary number of lines would be expensive. If you have a type - # that spans more than 2 lines, please use a typedef. - if linenum > 1: - previous = None - if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line): - # previous_line\n + ::current_line - previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$', - clean_lines.elided[linenum - 1]) - elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line): - # previous_line::\n + current_line - previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$', - clean_lines.elided[linenum - 1]) - if previous: - line = previous.group(1) + line.lstrip() - else: - # Check for templated parameter that is split across multiple lines - endpos = line.rfind('>') - if endpos > -1: - (_, startline, startpos) = ReverseCloseExpression( - clean_lines, linenum, endpos) - if startpos > -1 and startline < linenum: - # Found the matching < on an earlier line, collect all - # pieces up to current line. - line = '' - for i in xrange(startline, linenum + 1): - line += clean_lines.elided[i].strip() - - # Check for non-const references in function parameters. A single '&' may - # found in the following places: - # inside expression: binary & for bitwise AND - # inside expression: unary & for taking the address of something - # inside declarators: reference parameter - # We will exclude the first two cases by checking that we are not inside a - # function body, including one that was just introduced by a trailing '{'. - # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. - if (nesting_state.previous_stack_top and - not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or - isinstance(nesting_state.previous_stack_top, _NamespaceInfo))): - # Not at toplevel, not within a class, and not within a namespace - return - - # Avoid initializer lists. We only need to scan back from the - # current line for something that starts with ':'. - # - # We don't need to check the current line, since the '&' would - # appear inside the second set of parentheses on the current line as - # opposed to the first set. - if linenum > 0: - for i in xrange(linenum - 1, max(0, linenum - 10), -1): - previous_line = clean_lines.elided[i] - if not Search(r'[),]\s*$', previous_line): - break - if Match(r'^\s*:\s+\S', previous_line): - return - - # Avoid preprocessors - if Search(r'\\\s*$', line): - return - - # Avoid constructor initializer lists - if IsInitializerList(clean_lines, linenum): - return - - # We allow non-const references in a few standard places, like functions - # called "swap()" or iostream operators like "<<" or ">>". Do not check - # those function parameters. - # - # We also accept & in static_assert, which looks like a function but - # it's actually a declaration expression. - whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|' - r'operator\s*[<>][<>]|' - r'static_assert|COMPILE_ASSERT' - r')\s*\(') - if Search(whitelisted_functions, line): - return - elif not Search(r'\S+\([^)]*$', line): - # Don't see a whitelisted function on this line. Actually we - # didn't see any function name on this line, so this is likely a - # multi-line parameter list. Try a bit harder to catch this case. - for i in xrange(2): - if (linenum > i and - Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])): - return - - decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body - for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): - if (not Match(_RE_PATTERN_CONST_REF_PARAM, parameter) and - not Match(_RE_PATTERN_REF_STREAM_PARAM, parameter)): - error(filename, linenum, 'runtime/references', 2, - 'Is this a non-const reference? ' - 'If so, make const or use a pointer: ' + - ReplaceAll(' *<', '<', parameter)) - - -def CheckCasts(filename, clean_lines, linenum, error): - """Various cast related checks. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Check to see if they're using an conversion function cast. - # I just try to capture the most common basic types, though there are more. - # Parameterless conversion functions, such as bool(), are allowed as they are - # probably a member operator declaration or default constructor. - match = Search( - r'(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b' - r'(int|float|double|bool|char|int32|uint32|int64|uint64)' - r'(\([^)].*)', line) - expecting_function = ExpectingFunctionArgs(clean_lines, linenum) - if match and not expecting_function: - matched_type = match.group(2) - - # matched_new_or_template is used to silence two false positives: - # - New operators - # - Template arguments with function types - # - # For template arguments, we match on types immediately following - # an opening bracket without any spaces. This is a fast way to - # silence the common case where the function type is the first - # template argument. False negative with less-than comparison is - # avoided because those operators are usually followed by a space. - # - # function // bracket + no space = false positive - # value < double(42) // bracket + space = true positive - matched_new_or_template = match.group(1) - - # Avoid arrays by looking for brackets that come after the closing - # parenthesis. - if Match(r'\([^()]+\)\s*\[', match.group(3)): - return - - # Other things to ignore: - # - Function pointers - # - Casts to pointer types - # - Placement new - # - Alias declarations - matched_funcptr = match.group(3) - if (matched_new_or_template is None and - not (matched_funcptr and - (Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(', - matched_funcptr) or - matched_funcptr.startswith('(*)'))) and - not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and - not Search(r'new\(\S+\)\s*' + matched_type, line)): - error(filename, linenum, 'readability/casting', 4, - 'Using deprecated casting style. ' - 'Use static_cast<%s>(...) instead' % - matched_type) - - if not expecting_function: - CheckCStyleCast(filename, clean_lines, linenum, 'static_cast', - r'\((int|float|double|bool|char|u?int(16|32|64))\)', error) - - # This doesn't catch all cases. Consider (const char * const)"hello". - # - # (char *) "foo" should always be a const_cast (reinterpret_cast won't - # compile). - if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast', - r'\((char\s?\*+\s?)\)\s*"', error): - pass - else: - # Check pointer casts for other than string constants - CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast', - r'\((\w+\s?\*+\s?)\)', error) - - # In addition, we look for people taking the address of a cast. This - # is dangerous -- casts can assign to temporaries, so the pointer doesn't - # point where you think. - # - # Some non-identifier character is required before the '&' for the - # expression to be recognized as a cast. These are casts: - # expression = &static_cast(temporary()); - # function(&(int*)(temporary())); - # - # This is not a cast: - # reference_type&(int* function_param); - match = Search( - r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|' - r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line) - if match: - # Try a better error message when the & is bound to something - # dereferenced by the casted pointer, as opposed to the casted - # pointer itself. - parenthesis_error = False - match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line) - if match: - _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1))) - if x1 >= 0 and clean_lines.elided[y1][x1] == '(': - _, y2, x2 = CloseExpression(clean_lines, y1, x1) - if x2 >= 0: - extended_line = clean_lines.elided[y2][x2:] - if y2 < clean_lines.NumLines() - 1: - extended_line += clean_lines.elided[y2 + 1] - if Match(r'\s*(?:->|\[)', extended_line): - parenthesis_error = True - - if parenthesis_error: - error(filename, linenum, 'readability/casting', 4, - ('Are you taking an address of something dereferenced ' - 'from a cast? Wrapping the dereferenced expression in ' - 'parentheses will make the binding more obvious')) - else: - error(filename, linenum, 'runtime/casting', 4, - ('Are you taking an address of a cast? ' - 'This is dangerous: could be a temp var. ' - 'Take the address before doing the cast, rather than after')) - - -def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error): - """Checks for a C-style cast by looking for the pattern. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - cast_type: The string for the C++ cast to recommend. This is either - reinterpret_cast, static_cast, or const_cast, depending. - pattern: The regular expression used to find C-style casts. - error: The function to call with any errors found. - - Returns: - True if an error was emitted. - False otherwise. - """ - line = clean_lines.elided[linenum] - match = Search(pattern, line) - if not match: - return False - - # Exclude lines with keywords that tend to look like casts - context = line[0:match.start(1) - 1] - if Match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context): - return False - - # Try expanding current context to see if we one level of - # parentheses inside a macro. - if linenum > 0: - for i in xrange(linenum - 1, max(0, linenum - 5), -1): - context = clean_lines.elided[i] + context - if Match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context): - return False - - # operator++(int) and operator--(int) - if context.endswith(' operator++') or context.endswith(' operator--'): - return False - - # A single unnamed argument for a function tends to look like old style cast. - # If we see those, don't issue warnings for deprecated casts. - remainder = line[match.end(0):] - if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)', - remainder): - return False - - # At this point, all that should be left is actual casts. - error(filename, linenum, 'readability/casting', 4, - 'Using C-style cast. Use %s<%s>(...) instead' % - (cast_type, match.group(1))) - - return True - - -def ExpectingFunctionArgs(clean_lines, linenum): - """Checks whether where function type arguments are expected. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - - Returns: - True if the line at 'linenum' is inside something that expects arguments - of function types. - """ - line = clean_lines.elided[linenum] - return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or - (linenum >= 2 and - (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$', - clean_lines.elided[linenum - 1]) or - Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$', - clean_lines.elided[linenum - 2]) or - Search(r'\bstd::m?function\s*\<\s*$', - clean_lines.elided[linenum - 1])))) - - -_HEADERS_CONTAINING_TEMPLATES = ( - ('', ('deque',)), - ('', ('unary_function', 'binary_function', - 'plus', 'minus', 'multiplies', 'divides', 'modulus', - 'negate', - 'equal_to', 'not_equal_to', 'greater', 'less', - 'greater_equal', 'less_equal', - 'logical_and', 'logical_or', 'logical_not', - 'unary_negate', 'not1', 'binary_negate', 'not2', - 'bind1st', 'bind2nd', - 'pointer_to_unary_function', - 'pointer_to_binary_function', - 'ptr_fun', - 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t', - 'mem_fun_ref_t', - 'const_mem_fun_t', 'const_mem_fun1_t', - 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t', - 'mem_fun_ref', - )), - ('', ('numeric_limits',)), - ('', ('list',)), - ('', ('map', 'multimap',)), - ('', ('allocator', 'make_shared', 'make_unique', 'shared_ptr', - 'unique_ptr', 'weak_ptr')), - ('', ('queue', 'priority_queue',)), - ('', ('set', 'multiset',)), - ('', ('stack',)), - ('', ('char_traits', 'basic_string',)), - ('', ('tuple',)), - ('', ('unordered_map', 'unordered_multimap')), - ('', ('unordered_set', 'unordered_multiset')), - ('', ('pair',)), - ('', ('vector',)), - - # gcc extensions. - # Note: std::hash is their hash, ::hash is our hash - ('', ('hash_map', 'hash_multimap',)), - ('', ('hash_set', 'hash_multiset',)), - ('', ('slist',)), - ) - -_HEADERS_MAYBE_TEMPLATES = ( - ('', ('copy', 'max', 'min', 'min_element', 'sort', - 'transform', - )), - ('', ('forward', 'make_pair', 'move', 'swap')), - ) - -_RE_PATTERN_STRING = re.compile(r'\bstring\b') - -_re_pattern_headers_maybe_templates = [] -for _header, _templates in _HEADERS_MAYBE_TEMPLATES: - for _template in _templates: - # Match max(..., ...), max(..., ...), but not foo->max, foo.max or - # type::max(). - _re_pattern_headers_maybe_templates.append( - (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), - _template, - _header)) - -# Other scripts may reach in and modify this pattern. -_re_pattern_templates = [] -for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: - for _template in _templates: - _re_pattern_templates.append( - (re.compile(r'(\<|\b)' + _template + r'\s*\<'), - _template + '<>', - _header)) - - -def FilesBelongToSameModule(filename_cc, filename_h): - """Check if these two filenames belong to the same module. - - The concept of a 'module' here is a as follows: - foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the - same 'module' if they are in the same directory. - some/path/public/xyzzy and some/path/internal/xyzzy are also considered - to belong to the same module here. - - If the filename_cc contains a longer path than the filename_h, for example, - '/absolute/path/to/base/sysinfo.cc', and this file would include - 'base/sysinfo.h', this function also produces the prefix needed to open the - header. This is used by the caller of this function to more robustly open the - header file. We don't have access to the real include paths in this context, - so we need this guesswork here. - - Known bugs: tools/base/bar.cc and base/bar.h belong to the same module - according to this implementation. Because of this, this function gives - some false positives. This should be sufficiently rare in practice. - - Args: - filename_cc: is the path for the .cc file - filename_h: is the path for the header path - - Returns: - Tuple with a bool and a string: - bool: True if filename_cc and filename_h belong to the same module. - string: the additional prefix needed to open the header file. - """ - - fileinfo = FileInfo(filename_cc) - if not fileinfo.IsSource(): - return (False, '') - filename_cc = filename_cc[:-len(fileinfo.Extension())] - matched_test_suffix = Search(_TEST_FILE_SUFFIX, fileinfo.BaseName()) - if matched_test_suffix: - filename_cc = filename_cc[:-len(matched_test_suffix.group(1))] - filename_cc = filename_cc.replace('/public/', '/') - filename_cc = filename_cc.replace('/internal/', '/') - - if not filename_h.endswith('.h'): - return (False, '') - filename_h = filename_h[:-len('.h')] - if filename_h.endswith('-inl'): - filename_h = filename_h[:-len('-inl')] - filename_h = filename_h.replace('/public/', '/') - filename_h = filename_h.replace('/internal/', '/') - - files_belong_to_same_module = filename_cc.endswith(filename_h) - common_path = '' - if files_belong_to_same_module: - common_path = filename_cc[:-len(filename_h)] - return files_belong_to_same_module, common_path - - -def UpdateIncludeState(filename, include_dict, io=codecs): - """Fill up the include_dict with new includes found from the file. - - Args: - filename: the name of the header to read. - include_dict: a dictionary in which the headers are inserted. - io: The io factory to use to read the file. Provided for testability. - - Returns: - True if a header was successfully added. False otherwise. - """ - headerfile = None - try: - headerfile = io.open(filename, 'r', 'utf8', 'replace') - except IOError: - return False - linenum = 0 - for line in headerfile: - linenum += 1 - clean_line = CleanseComments(line) - match = _RE_PATTERN_INCLUDE.search(clean_line) - if match: - include = match.group(2) - include_dict.setdefault(include, linenum) - return True - - -def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, - io=codecs): - """Reports for missing stl includes. - - This function will output warnings to make sure you are including the headers - necessary for the stl containers and functions that you use. We only give one - reason to include a header. For example, if you use both equal_to<> and - less<> in a .h file, only one (the latter in the file) of these will be - reported as a reason to include the . - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - include_state: An _IncludeState instance. - error: The function to call with any errors found. - io: The IO factory to use to read the header file. Provided for unittest - injection. - """ - required = {} # A map of header name to linenumber and the template entity. - # Example of required: { '': (1219, 'less<>') } - - for linenum in xrange(clean_lines.NumLines()): - line = clean_lines.elided[linenum] - if not line or line[0] == '#': - continue - - # String is special -- it is a non-templatized type in STL. - matched = _RE_PATTERN_STRING.search(line) - if matched: - # Don't warn about strings in non-STL namespaces: - # (We check only the first match per line; good enough.) - prefix = line[:matched.start()] - if prefix.endswith('std::') or not prefix.endswith('::'): - required[''] = (linenum, 'string') - - for pattern, template, header in _re_pattern_headers_maybe_templates: - if pattern.search(line): - required[header] = (linenum, template) - - # The following function is just a speed up, no semantics are changed. - if not '<' in line: # Reduces the cpu time usage by skipping lines. - continue - - for pattern, template, header in _re_pattern_templates: - matched = pattern.search(line) - if matched: - # Don't warn about IWYU in non-STL namespaces: - # (We check only the first match per line; good enough.) - prefix = line[:matched.start()] - if prefix.endswith('std::') or not prefix.endswith('::'): - required[header] = (linenum, template) - - # The policy is that if you #include something in foo.h you don't need to - # include it again in foo.cc. Here, we will look at possible includes. - # Let's flatten the include_state include_list and copy it into a dictionary. - include_dict = dict([item for sublist in include_state.include_list - for item in sublist]) - - # Did we find the header for this file (if any) and successfully load it? - header_found = False - - # Use the absolute path so that matching works properly. - abs_filename = FileInfo(filename).FullName() - - # For Emacs's flymake. - # If cpplint is invoked from Emacs's flymake, a temporary file is generated - # by flymake and that file name might end with '_flymake.cc'. In that case, - # restore original file name here so that the corresponding header file can be - # found. - # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h' - # instead of 'foo_flymake.h' - abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename) - - # include_dict is modified during iteration, so we iterate over a copy of - # the keys. - header_keys = include_dict.keys() - for header in header_keys: - (same_module, common_path) = FilesBelongToSameModule(abs_filename, header) - fullpath = common_path + header - if same_module and UpdateIncludeState(fullpath, include_dict, io): - header_found = True - - # If we can't find the header file for a .cc, assume it's because we don't - # know where to look. In that case we'll give up as we're not sure they - # didn't include it in the .h file. - # TODO(unknown): Do a better job of finding .h files so we are confident that - # not having the .h file means there isn't one. - if filename.endswith('.cc') and not header_found: - return - - # All the lines have been processed, report the errors found. - for required_header_unstripped in required: - template = required[required_header_unstripped][1] - if required_header_unstripped.strip('<>"') not in include_dict: - error(filename, required[required_header_unstripped][0], - 'build/include_what_you_use', 4, - 'Add #include ' + required_header_unstripped + ' for ' + template) - - -_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<') - - -def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): - """Check that make_pair's template arguments are deduced. - - G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are - specified explicitly, and such use isn't intended in any case. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line) - if match: - error(filename, linenum, 'build/explicit_make_pair', - 4, # 4 = high confidence - 'For C++11-compatibility, omit template arguments from make_pair' - ' OR use pair directly OR if appropriate, construct a pair directly') - - -def CheckRedundantVirtual(filename, clean_lines, linenum, error): - """Check if line contains a redundant "virtual" function-specifier. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - # Look for "virtual" on current line. - line = clean_lines.elided[linenum] - virtual = Match(r'^(.*)(\bvirtual\b)(.*)$', line) - if not virtual: return - - # Ignore "virtual" keywords that are near access-specifiers. These - # are only used in class base-specifier and do not apply to member - # functions. - if (Search(r'\b(public|protected|private)\s+$', virtual.group(1)) or - Match(r'^\s+(public|protected|private)\b', virtual.group(3))): - return - - # Ignore the "virtual" keyword from virtual base classes. Usually - # there is a column on the same line in these cases (virtual base - # classes are rare in google3 because multiple inheritance is rare). - if Match(r'^.*[^:]:[^:].*$', line): return - - # Look for the next opening parenthesis. This is the start of the - # parameter list (possibly on the next line shortly after virtual). - # TODO(unknown): doesn't work if there are virtual functions with - # decltype() or other things that use parentheses, but csearch suggests - # that this is rare. - end_col = -1 - end_line = -1 - start_col = len(virtual.group(2)) - for start_line in xrange(linenum, min(linenum + 3, clean_lines.NumLines())): - line = clean_lines.elided[start_line][start_col:] - parameter_list = Match(r'^([^(]*)\(', line) - if parameter_list: - # Match parentheses to find the end of the parameter list - (_, end_line, end_col) = CloseExpression( - clean_lines, start_line, start_col + len(parameter_list.group(1))) - break - start_col = 0 - - if end_col < 0: - return # Couldn't find end of parameter list, give up - - # Look for "override" or "final" after the parameter list - # (possibly on the next few lines). - for i in xrange(end_line, min(end_line + 3, clean_lines.NumLines())): - line = clean_lines.elided[i][end_col:] - match = Search(r'\b(override|final)\b', line) - if match: - error(filename, linenum, 'readability/inheritance', 4, - ('"virtual" is redundant since function is ' - 'already declared as "%s"' % match.group(1))) - - # Set end_col to check whole lines after we are done with the - # first line. - end_col = 0 - if Search(r'[^\w]\s*$', line): - break - - -def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error): - """Check if line contains a redundant "override" or "final" virt-specifier. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - # Look for closing parenthesis nearby. We need one to confirm where - # the declarator ends and where the virt-specifier starts to avoid - # false positives. - line = clean_lines.elided[linenum] - declarator_end = line.rfind(')') - if declarator_end >= 0: - fragment = line[declarator_end:] - else: - if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0: - fragment = line - else: - return - - # Check that at most one of "override" or "final" is present, not both - if Search(r'\boverride\b', fragment) and Search(r'\bfinal\b', fragment): - error(filename, linenum, 'readability/inheritance', 4, - ('"override" is redundant since function is ' - 'already declared as "final"')) - - - - -# Returns true if we are at a new block, and it is directly -# inside of a namespace. -def IsBlockInNameSpace(nesting_state, is_forward_declaration): - """Checks that the new block is directly in a namespace. - - Args: - nesting_state: The _NestingState object that contains info about our state. - is_forward_declaration: If the class is a forward declared class. - Returns: - Whether or not the new block is directly in a namespace. - """ - if is_forward_declaration: - if len(nesting_state.stack) >= 1 and ( - isinstance(nesting_state.stack[-1], _NamespaceInfo)): - return True - else: - return False - - return (len(nesting_state.stack) > 1 and - nesting_state.stack[-1].check_namespace_indentation and - isinstance(nesting_state.stack[-2], _NamespaceInfo)) - - -def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, - raw_lines_no_comments, linenum): - """This method determines if we should apply our namespace indentation check. - - Args: - nesting_state: The current nesting state. - is_namespace_indent_item: If we just put a new class on the stack, True. - If the top of the stack is not a class, or we did not recently - add the class, False. - raw_lines_no_comments: The lines without the comments. - linenum: The current line number we are processing. - - Returns: - True if we should apply our namespace indentation check. Currently, it - only works for classes and namespaces inside of a namespace. - """ - - is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments, - linenum) - - if not (is_namespace_indent_item or is_forward_declaration): - return False - - # If we are in a macro, we do not want to check the namespace indentation. - if IsMacroDefinition(raw_lines_no_comments, linenum): - return False - - return IsBlockInNameSpace(nesting_state, is_forward_declaration) - - -# Call this method if the line is directly inside of a namespace. -# If the line above is blank (excluding comments) or the start of -# an inner namespace, it cannot be indented. -def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum, - error): - line = raw_lines_no_comments[linenum] - if Match(r'^\s+', line): - error(filename, linenum, 'runtime/indentation_namespace', 4, - 'Do not indent within a namespace') - - -def ProcessLine(filename, file_extension, clean_lines, line, - include_state, function_state, nesting_state, error, - extra_check_functions=[]): - """Processes a single line in the file. - - Args: - filename: Filename of the file that is being processed. - file_extension: The extension (dot not included) of the file. - clean_lines: An array of strings, each representing a line of the file, - with comments stripped. - line: Number of line being processed. - include_state: An _IncludeState instance in which the headers are inserted. - function_state: A _FunctionState instance which counts function lines, etc. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: A callable to which errors are reported, which takes 4 arguments: - filename, line number, error level, and message - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ - raw_lines = clean_lines.raw_lines - ParseNolintSuppressions(filename, raw_lines[line], line, error) - nesting_state.Update(filename, clean_lines, line, error) - CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, - error) - if nesting_state.InAsmBlock(): return - CheckForFunctionLengths(filename, clean_lines, line, function_state, error) - CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) - CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) - CheckLanguage(filename, clean_lines, line, file_extension, include_state, - nesting_state, error) - CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) - CheckForNonStandardConstructs(filename, clean_lines, line, - nesting_state, error) - CheckVlogArguments(filename, clean_lines, line, error) - CheckPosixThreading(filename, clean_lines, line, error) - CheckInvalidIncrement(filename, clean_lines, line, error) - CheckMakePairUsesDeduction(filename, clean_lines, line, error) - CheckRedundantVirtual(filename, clean_lines, line, error) - CheckRedundantOverrideOrFinal(filename, clean_lines, line, error) - for check_fn in extra_check_functions: - check_fn(filename, clean_lines, line, error) - -def FlagCxx11Features(filename, clean_lines, linenum, error): - """Flag those c++11 features that we only allow in certain places. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) - - # Flag unapproved C++ TR1 headers. - if include and include.group(1).startswith('tr1/'): - error(filename, linenum, 'build/c++tr1', 5, - ('C++ TR1 headers such as <%s> are unapproved.') % include.group(1)) - - # Flag unapproved C++11 headers. - # if include and include.group(1) in ('cfenv', - # 'condition_variable', - # 'fenv.h', - # 'future', - # 'mutex', - # 'thread', - # 'chrono', - # 'ratio', - # 'regex', - # 'system_error', - # ): - # error(filename, linenum, 'build/c++11', 5, - # ('<%s> is an unapproved C++11 header.') % include.group(1)) - - # The only place where we need to worry about C++11 keywords and library - # features in preprocessor directives is in macro definitions. - if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return - - # These are classes and free functions. The classes are always - # mentioned as std::*, but we only catch the free functions if - # they're not found by ADL. They're alphabetical by header. - for top_name in ( - # type_traits - 'alignment_of', - 'aligned_union', - ): - if Search(r'\bstd::%s\b' % top_name, line): - error(filename, linenum, 'build/c++11', 5, - ('std::%s is an unapproved C++11 class or function. Send c-style ' - 'an example of where it would make your code more readable, and ' - 'they may let you use it.') % top_name) - - -def FlagCxx14Features(filename, clean_lines, linenum, error): - """Flag those C++14 features that we restrict. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) - - # Flag unapproved C++14 headers. - if include and include.group(1) in ('scoped_allocator', 'shared_mutex'): - error(filename, linenum, 'build/c++14', 5, - ('<%s> is an unapproved C++14 header.') % include.group(1)) - - -def ProcessFileData(filename, file_extension, lines, error, - extra_check_functions=[]): - """Performs lint checks and reports any errors to the given error function. - - Args: - filename: Filename of the file that is being processed. - file_extension: The extension (dot not included) of the file. - lines: An array of strings, each representing a line of the file, with the - last element being empty if the file is terminated with a newline. - error: A callable to which errors are reported, which takes 4 arguments: - filename, line number, error level, and message - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ - lines = (['// marker so line numbers and indices both start at 1'] + lines + - ['// marker so line numbers end in a known way']) - - include_state = _IncludeState() - function_state = _FunctionState() - nesting_state = NestingState() - - ResetNolintSuppressions() - - CheckForCopyright(filename, lines, error) - ProcessGlobalSuppresions(lines) - RemoveMultiLineComments(filename, lines, error) - clean_lines = CleansedLines(lines) - - if IsHeaderExtension(file_extension): - CheckForHeaderGuard(filename, clean_lines, error) - - for line in xrange(clean_lines.NumLines()): - ProcessLine(filename, file_extension, clean_lines, line, - include_state, function_state, nesting_state, error, - extra_check_functions) - FlagCxx11Features(filename, clean_lines, line, error) - nesting_state.CheckCompletedBlocks(filename, error) - - CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) - - # Check that the .cc file has included its header if it exists. - if _IsSourceExtension(file_extension): - CheckHeaderFileIncluded(filename, include_state, error) - - # We check here rather than inside ProcessLine so that we see raw - # lines rather than "cleaned" lines. - CheckForBadCharacters(filename, lines, error) - - CheckForNewlineAtEOF(filename, lines, error) - -def ProcessConfigOverrides(filename): - """ Loads the configuration files and processes the config overrides. - - Args: - filename: The name of the file being processed by the linter. - - Returns: - False if the current |filename| should not be processed further. - """ - - abs_filename = os.path.abspath(filename) - cfg_filters = [] - keep_looking = True - while keep_looking: - abs_path, base_name = os.path.split(abs_filename) - if not base_name: - break # Reached the root directory. - - cfg_file = os.path.join(abs_path, "CPPLINT.cfg") - abs_filename = abs_path - if not os.path.isfile(cfg_file): - continue - - try: - with open(cfg_file) as file_handle: - for line in file_handle: - line, _, _ = line.partition('#') # Remove comments. - if not line.strip(): - continue - - name, _, val = line.partition('=') - name = name.strip() - val = val.strip() - if name == 'set noparent': - keep_looking = False - elif name == 'filter': - cfg_filters.append(val) - elif name == 'exclude_files': - # When matching exclude_files pattern, use the base_name of - # the current file name or the directory name we are processing. - # For example, if we are checking for lint errors in /foo/bar/baz.cc - # and we found the .cfg file at /foo/CPPLINT.cfg, then the config - # file's "exclude_files" filter is meant to be checked against "bar" - # and not "baz" nor "bar/baz.cc". - if base_name: - pattern = re.compile(val) - if pattern.match(base_name): - if _cpplint_state.quiet: - # Suppress "Ignoring file" warning when using --quiet. - return False - sys.stderr.write('Ignoring "%s": file excluded by "%s". ' - 'File path component "%s" matches ' - 'pattern "%s"\n' % - (filename, cfg_file, base_name, val)) - return False - elif name == 'linelength': - global _line_length - try: - _line_length = int(val) - except ValueError: - sys.stderr.write('Line length must be numeric.') - elif name == 'root': - global _root - # root directories are specified relative to CPPLINT.cfg dir. - _root = os.path.join(os.path.dirname(cfg_file), val) - elif name == 'headers': - ProcessHppHeadersOption(val) - else: - sys.stderr.write( - 'Invalid configuration option (%s) in file %s\n' % - (name, cfg_file)) - - except IOError: - sys.stderr.write( - "Skipping config file '%s': Can't open for reading\n" % cfg_file) - keep_looking = False - - # Apply all the accumulated filters in reverse order (top-level directory - # config options having the least priority). - for filter in reversed(cfg_filters): - _AddFilters(filter) - - return True - - -def ProcessFile(filename, vlevel, extra_check_functions=[]): - """Does google-lint on a single file. - - Args: - filename: The name of the file to parse. - - vlevel: The level of errors to report. Every error of confidence - >= verbose_level will be reported. 0 is a good default. - - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ - - _SetVerboseLevel(vlevel) - _BackupFilters() - old_errors = _cpplint_state.error_count - - if not ProcessConfigOverrides(filename): - _RestoreFilters() - return - - lf_lines = [] - crlf_lines = [] - try: - # Support the UNIX convention of using "-" for stdin. Note that - # we are not opening the file with universal newline support - # (which codecs doesn't support anyway), so the resulting lines do - # contain trailing '\r' characters if we are reading a file that - # has CRLF endings. - # If after the split a trailing '\r' is present, it is removed - # below. - if filename == '-': - lines = codecs.StreamReaderWriter(sys.stdin, - codecs.getreader('utf8'), - codecs.getwriter('utf8'), - 'replace').read().split('\n') - else: - lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n') - - # Remove trailing '\r'. - # The -1 accounts for the extra trailing blank line we get from split() - for linenum in range(len(lines) - 1): - if lines[linenum].endswith('\r'): - lines[linenum] = lines[linenum].rstrip('\r') - crlf_lines.append(linenum + 1) - else: - lf_lines.append(linenum + 1) - - except IOError: - sys.stderr.write( - "Skipping input '%s': Can't open for reading\n" % filename) - _RestoreFilters() - return - - # Note, if no dot is found, this will give the entire filename as the ext. - file_extension = filename[filename.rfind('.') + 1:] - - # When reading from stdin, the extension is unknown, so no cpplint tests - # should rely on the extension. - if filename != '-' and file_extension not in _valid_extensions: - sys.stderr.write('Ignoring %s; not a valid file name ' - '(%s)\n' % (filename, ', '.join(_valid_extensions))) - else: - ProcessFileData(filename, file_extension, lines, Error, - extra_check_functions) - - # If end-of-line sequences are a mix of LF and CR-LF, issue - # warnings on the lines with CR. - # - # Don't issue any warnings if all lines are uniformly LF or CR-LF, - # since critique can handle these just fine, and the style guide - # doesn't dictate a particular end of line sequence. - # - # We can't depend on os.linesep to determine what the desired - # end-of-line sequence should be, since that will return the - # server-side end-of-line sequence. - if lf_lines and crlf_lines: - # Warn on every line with CR. An alternative approach might be to - # check whether the file is mostly CRLF or just LF, and warn on the - # minority, we bias toward LF here since most tools prefer LF. - for linenum in crlf_lines: - Error(filename, linenum, 'whitespace/newline', 1, - 'Unexpected \\r (^M) found; better to use only \\n') - - # Suppress printing anything if --quiet was passed unless the error - # count has increased after processing this file. - if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count: - sys.stdout.write('Done processing %s\n' % filename) - _RestoreFilters() - - -def PrintUsage(message): - """Prints a brief usage string and exits, optionally with an error message. - - Args: - message: The optional error message. - """ - sys.stderr.write(_USAGE) - if message: - sys.exit('\nFATAL ERROR: ' + message) - else: - sys.exit(1) - - -def PrintCategories(): - """Prints a list of all the error-categories used by error messages. - - These are the categories used to filter messages via --filter. - """ - sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES)) - sys.exit(0) - - -def ParseArguments(args): - """Parses the command line arguments. - - This may set the output format and verbosity level as side-effects. - - Args: - args: The command line arguments: - - Returns: - The list of filenames to lint. - """ - try: - (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=', - 'counting=', - 'filter=', - 'root=', - 'linelength=', - 'extensions=', - 'headers=', - 'quiet']) - except getopt.GetoptError: - PrintUsage('Invalid arguments.') - - verbosity = _VerboseLevel() - output_format = _OutputFormat() - filters = '' - quiet = _Quiet() - counting_style = '' - - for (opt, val) in opts: - if opt == '--help': - PrintUsage(None) - elif opt == '--output': - if val not in ('emacs', 'vs7', 'eclipse'): - PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.') - output_format = val - elif opt == '--quiet': - quiet = True - elif opt == '--verbose': - verbosity = int(val) - elif opt == '--filter': - filters = val - if not filters: - PrintCategories() - elif opt == '--counting': - if val not in ('total', 'toplevel', 'detailed'): - PrintUsage('Valid counting options are total, toplevel, and detailed') - counting_style = val - elif opt == '--root': - global _root - _root = val - elif opt == '--linelength': - global _line_length - try: - _line_length = int(val) - except ValueError: - PrintUsage('Line length must be digits.') - elif opt == '--extensions': - global _valid_extensions - try: - _valid_extensions = set(val.split(',')) - except ValueError: - PrintUsage('Extensions must be comma seperated list.') - elif opt == '--headers': - ProcessHppHeadersOption(val) - - if not filenames: - PrintUsage('No files were specified.') - - _SetOutputFormat(output_format) - _SetQuiet(quiet) - _SetVerboseLevel(verbosity) - _SetFilters(filters) - _SetCountingStyle(counting_style) - - return filenames - - -def main(): - filenames = ParseArguments(sys.argv[1:]) - - # Change stderr to write with replacement characters so we don't die - # if we try to print something containing non-ASCII characters. - sys.stderr = codecs.StreamReaderWriter(sys.stderr, - codecs.getreader('utf8'), - codecs.getwriter('utf8'), - 'replace') - - _cpplint_state.ResetErrorCounts() - for filename in filenames: - ProcessFile(filename, _cpplint_state.verbose_level) - # If --quiet is passed, suppress printing error count unless there are errors. - if not _cpplint_state.quiet or _cpplint_state.error_count > 0: - _cpplint_state.PrintErrorCounts() - - sys.exit(_cpplint_state.error_count > 0) - - -if __name__ == '__main__': - main() diff --git a/eval/Makefile b/eval/Makefile deleted file mode 100644 index b802467..0000000 --- a/eval/Makefile +++ /dev/null @@ -1,117 +0,0 @@ -EVAL_DF_BINS=10,20,30,40,50,60,70,80,90,100 - -all: eval lighteval - -lighteval: vitoria.lighteval stuttgart.lighteval paris.lighteval switzerland.lighteval - -eval: vitoria.eval stuttgart.eval paris.eval switzerland.eval - -clean: - rm -f *.eval - rm -rf gtfs - rf -rf osm - rm -rf evalout - -osmconvert: - wget -O - http://m.m.i24.cc/osmconvert.c | cc -x c - -lz -O3 -o osmconvert - -%.lighteval: osm/%.osm gtfs/%/stops.txt gtfs/%/stop_times.txt gtfs/%/trips.txt gtfs/%/routes.txt eval.cfg eval-wo-osm.cfg - mkdir -p gtfs/$*/shaped - rm -f gtfs/$*/shaped/* - ../build/pfaedle -x $< -i gtfs/$* -c eval.cfg -o gtfs/$*/shaped -D -m all 2>&1 | tee $@ - -%.eval: osm/%.osm gtfs/%/stops.txt gtfs/%/stop_times.txt gtfs/%/trips.txt gtfs/%/routes.txt eval.cfg eval-wo-osm.cfg - mkdir -p gtfs/$*/shaped - rm -f gtfs/$*/shaped/* - mkdir -p evalout/ - mkdir -p evalout/$*/ - mkdir -p evalout/$*/hmm+osm - ../build/pfaedle -x $< -i gtfs/$* -c eval.cfg --eval-path evalout/$*/hmm+osm -o gtfs/$*/shaped -D -m all --eval --eval-df-bins $(EVAL_DF_BINS) 2>&1 | tee $@ - find evalout/$*/hmm+osm/ -name "*.json" -print0 | xargs -0 rm - - mkdir -p evalout/$*/hmm - ../build/pfaedle -x $< -i gtfs/$* -c eval-wo-osm.cfg --eval-path evalout/$*/hmm -o gtfs/$*/shaped -D -m all --eval --eval-df-bins $(EVAL_DF_BINS) 2>&1 | tee $@ - find evalout/$*/hmm/ -name "*.json" -print0 | xargs -0 rm - - mkdir -p evalout/$*/greedy - ../build/pfaedle -x $< -i gtfs/$* -c eval.cfg --method greedy --eval-path evalout/$*/greedy -o gtfs/$*/shaped -D -m all --eval --eval-df-bins $(EVAL_DF_BINS) 2>&1 | tee $@ - find evalout/$*/greedy/ -name "*.json" -print0 | xargs -0 rm - - mkdir -p evalout/$*/greedy2 - ../build/pfaedle -x $< -i gtfs/$* -c eval.cfg --method greedy2 --eval-path evalout/$*/greedy2 -o gtfs/$*/shaped -D -m all --eval --eval-df-bins $(EVAL_DF_BINS) 2>&1 | tee $@ - find evalout/$*/greedy2/ -name "*.json" -print0 | xargs -0 rm - -osm/spain-latest.osm.pbf: - mkdir -p osm - wget http://download.geofabrik.de/europe/spain-latest.osm.pbf -O $@ - -osm/spain-latest.osm: osm/spain-latest.osm.pbf osmconvert - @# pre-filter to vitoria gasteiz - osmconvert -b=-2.8661,42.7480,-2.4788,43.0237 $< > $@ - -osm/baden-wuerttemberg-latest.osm.pbf: - mkdir -p osm - wget http://download.geofabrik.de/europe/germany/baden-wuerttemberg-latest.osm.pbf -O $@ - -osm/baden-wuerttemberg-latest.osm: osm/baden-wuerttemberg-latest.osm.pbf osmconvert - osmconvert $< > $@ - -osm/france-latest.osm.pbf: - mkdir -p osm - wget http://download.geofabrik.de/europe/france-latest.osm.pbf -O $@ - -osm/paris-latest.osm: osm/france-latest.osm.pbf osmconvert - @# pre-filter to greater ile de france - osmconvert -b=0.374,47.651,4.241,50.261 $< > $@ - -osm/europe-latest.osm.pbf: - mkdir -p osm - wget http://download.geofabrik.de/europe-latest.osm.pbf -O $@ - -osm/switzerland-latest.osm: osm/europe-latest.osm.pbf osmconvert - @# pre-filter to greater switzerland - osmconvert -b=3.757,44.245,15.579,52.670 $< > $@ - -gtfs/vitoria/%.txt: - mkdir -p gtfs - mkdir -p gtfs/vitoria - wget https://transitfeeds.com/p/tuvisa-euskotran/239/latest/download -O gtfs/vitoria/gtfs.zip - cd gtfs/vitoria && unzip -o gtfs.zip - rm gtfs/vitoria/gtfs.zip - -gtfs/stuttgart/%.txt: - mkdir -p gtfs - mkdir -p gtfs/stuttgart - echo "******************************************************************" - echo "* A password is required to access the VVS dataset. Send a mail *" - echo "* to brosi@cs.informatik.uni-freiburg.de to receive the password. " - echo "******************************************************************" - wget http://www.vvs.de/download/opendata/VVS_GTFS.zip --ask-password --user vvsopendata01 -O gtfs/stuttgart/gtfs.zip - cd gtfs/stuttgart && unzip gtfs.zip - rm gtfs/stuttgart/gtfs.zip - -gtfs/paris/%.txt: - mkdir -p gtfs - mkdir -p gtfs/paris - wget https://transitfeeds.com/p/stif/822/latest/download -O gtfs/paris/gtfs.zip - cd gtfs/paris && unzip gtfs.zip - rm gtfs/paris/gtfs.zip - -gtfs/switzerland/%.txt: - mkdir -p gtfs - mkdir -p gtfs/switzerland - wget http://gtfs.geops.ch/dl/gtfs_complete.zip -O gtfs/switzerland/gtfs.zip - cd gtfs/switzerland && unzip gtfs.zip - rm gtfs/switzerland/gtfs.zip - -osm/vitoria.osm: osm/spain-latest.osm gtfs/vitoria/stops.txt eval.cfg - ../build/pfaedle -x $< -i gtfs/vitoria/ -c eval.cfg -m all -X $@ - -osm/stuttgart.osm: osm/baden-wuerttemberg-latest.osm gtfs/stuttgart/stops.txt eval.cfg - ../build/pfaedle -x $< -i gtfs/stuttgart/ -c eval.cfg -m all -X $@ - -osm/paris.osm: osm/paris-latest.osm gtfs/paris/stops.txt eval.cfg - ../build/pfaedle -x $< -i gtfs/paris/ -c eval.cfg -m all -X $@ - -osm/switzerland.osm: osm/switzerland-latest.osm gtfs/switzerland/stops.txt eval.cfg - ../build/pfaedle -x $< -i gtfs/switzerland/ -c eval.cfg -m all -X $@ diff --git a/eval/eval-wo-osm.cfg b/eval/eval-wo-osm.cfg deleted file mode 100644 index dfc8e75..0000000 --- a/eval/eval-wo-osm.cfg +++ /dev/null @@ -1,981 +0,0 @@ -# Copyright 2018, University of Freiburg -# Chair of Algorithms and Datastructures -# Authors: Patrick Brosi - -[rail] - -# OSM entities to keep on different levels, as k=v. Applies -# to nodes, edges and relations. -# Nodes included in kept ways are always kept. -# Ways included in kept relations are always kept. - -osm_filter_keep: - railway=rail - railway=light_rail - railway=narrow_gauge - route=rail - route=train - public_transport=stop_area|rel_flat - -osm_filter_lvl1: - usage=branch - -osm_filter_lvl2: - -osm_filter_lvl3: - service=crossover - service=siding - # we cannot completely drop service=yard, because it is often used - # incorrectly for crossovers - service=yard - -osm_filter_lvl4: - -osm_filter_lvl5: - usage=industrial - usage=military - usage=test - service=spur - railway:traffic_mode=freight - -# OSM entities to drop, as k=v. Applies to nodes, edges and -# relations. -# Nodes included in non-dropped ways are kept regardless of -# a matching drop filter. -# Ways included in non-dropped relations are kept regardless of -# a matching drop filter. - -osm_filter_drop: - railway=abandoned - railway=construction - railway=disused - railway=miniature - railway=signal - railway=razed - railway=proposed - metro=yes - area=yes - # access=no - type=multipolygon - railway=platform - public_transport=platform - building=yes - building=train_station - amenity=shelter - amenity=bus_station - building=roof - -# Nodes that should act as "no-hup" nodes. These are nodes -# that are contained in multiple ways, but cannot be used -# to switch from one way to another (for example, a -# track crossing in rail networks) - -osm_filter_nohup: - railway:switch=no - railway=railway_crossing - -# Edges that should act as one-way nodes. - -osm_filter_oneway: - oneway=yes - railway:preferred_direction=forward - -osm_filter_oneway_reverse: - railway:preferred_direction=backward - -# Edges that may explicitely be used in -# both directions. May be used to set exception -# to "osm_filter_oneway" - -osm_filter_undirected: - oneway=false - oneway=no - oneway=-1 - railway:preferred_direction=both - railway:bidirectional=regular - -# Nodes that are stations. -# Only nodes that have been kept during the filtering above will be -# checked. -osm_filter_station: - public_transport=stop_position - railway=stop - railway=halt - railway=station - #railway=tram_stop - railway=subway_stop - tram_stop=* - stop=* - -# Relation fields that should be used for catching the lines that -# occur on an edge. Only relations that have been kept during the -# filtering above will be checked. The 'linename' will be normalized -# according to the rules in line_normalization_chain. -# The 'from_name' and 'to_name' will be normalized according to the -# rules in station_normalization_chain. -# The relations tags are given in the order of their relevance - -# the first normalized tag-value that is not null/empty will be -# taken. -osm_line_relation_tags: - -# attr name together with the -# max distance in meters between any of the groups members and -# a potential new member -# first matching rule will be taken -# only applies to nodes that match osm_filter_station! -osm_station_group_attrs: - uic_ref=500 - wikidata=500 - [public_transport=stop_area]uic_ref=500 - [public_transport=stop_area]wikidata=500 - name=100 - [public_transport=stop_area]name=100 - -# max distance in meters between a snapped station position and the -# original station position -osm_max_snap_distance: 10, 100, 200 - -# max edge level to which station will be snapped -osm_max_snap_level: 2 - -# sorted by priority, first found attr will be taken -osm_station_name_attrs: - name - [public_transport=stop_area]name - uic_name - -# the track number tag in edges, first match is taken -osm_edge_track_number_tags: - railway:track_ref - local_ref - ref - -# the track number tag in stop nodes, first match is taken, -# overwrites osm_edge_track_number_tags -osm_track_number_tags: - local_ref - ref - -routing_lvl0_fac: 1 # default level -routing_lvl1_fac: 1.25 -routing_lvl2_fac: 1.5 -routing_lvl3_fac: 2 -routing_lvl4_fac: 2.5 -routing_lvl5_fac: 3.5 -routing_lvl6_fac: 5 -routing_lvl7_fac: 7 - -# Punishment (in meters) to add to the distance -# function if a vehicle performans a full turn -routing_full_turn_punish: 3000 - -routing_station_distance_punish_fac: 3.14 - -routing_non_osm_station_punish: 100 - -routing_platform_unmatched_punish: 2000 - -# Max angle that should be counted as a full turn -routing_full_turn_angle: 100 - -# Max angle in a route from a station to an already reachable neighbar -routing_snap_full_turn_angle: 100 - -# Punishment (in meters) to add to the distance -# function if a vehicle passes a station node without -# stopping there -routing_pass_thru_station_punish: 100 - -# Punishment factor for every meter a vehicle -# travels through a one-way edge -routing_one_way_meter_punish_fac: 1 - -# Punishment factor for every meter a vehicle -# travels through an edge without any matching line -# information -routing_line_unmatched_punish_fac: 1 - -# special line normalization for trains -line_normalize_chain: - , -> ' '; - - -> ' '; - _ -> ' '; - " -> ''; - ' -> ''; - ` -> ''; - / -> ' '; - < -> ' '; - > -> ' '; - & -> '+'; - ä -> ae; - ö -> oe; - ü -> ue; - ß -> ss; - è -> e; - é -> e; - á -> a; - à -> a; - ó -> o; - ò -> o; - í -> i; - ú -> u; - ù -> u; - ë -> e; - ç -> c; - å -> ae; - â -> a; - ê -> e; - ï -> i; - œ -> oe; - ø -> oe; - ^line -> ''; - ^linie -> ''; - ^metro -> ''; - ^tram -> ''; - ^strassenbahn -> ''; - ^bus -> ''; - - # delete everything in brackets - \(.+\) -> ' '; - \[.+\] -> ' '; - - # whitespace - \s+ -> ' '; - ^\s -> ''; - \s$ -> ''; - - # line/number combs ALWAYS with whitespace (ICE101 -> ICE 101) - ^([a-zA-Z]+)([0-9]+)$ -> \1 \2; - - # if a character line number is present, delete the numeric part - ^([a-zA-Z]+) [0-9]+$ -> \1; - -[bus] - -# OSM entities to keep on different levels, as k=v. Applies -# to nodes, edges and relations. -# Nodes included in kept ways are always kept. -# Ways included in kept relations are always kept. - -osm_filter_keep: - # highways - highway=motorway - highway=trunk - highway=primary - highway=secondary - highway=tertiary - highway=residential - highway=living_street - highway=unclassified - - # highway links - highway=motorway_link - highway=trunk_link - highway=primary_link - highway=secondary_link - highway=tertiary_link - highway=residential_link - - way=primary - way=seconday - way=bus_guideway - highway=bus_guideway - busway=* - psv=yes - psv=designated - - trolley_wire=yes - trolleywire=yes - trolleybus=yes - trolley_bus=yes - - route=bus - route=trolleybus - bus=yes - - public_transport=stop_position - bus_stop=* - stop=* - highway=bus_stop - amenity=bus_station|no_match_ways|no_match_rels - - # relations for the restriction system - type=restriction - type=restriction:bus - type=restriction:motorcar - -osm_filter_lvl1: - highway=secondary - highway=secondary_link - bus=yes - psv=yes - access=psv - access=bus - trolley_wire=yes - trolleywire=yes - trolleybus=yes - trolley_bus=yes - psv=designated - -osm_filter_lvl2: - highway=tertiary - highway=tertiary_link - -osm_filter_lvl3: - highway=unclassified - highway=residential - highway=road - -osm_filter_lvl4: - highway=living_street - highway=pedestrian - highway=service - -osm_filter_lvl5: - service=siding - access=permissive - access=private - access=no - service=parking_aisle - highway=footway - -# OSM entities to drop, as k=v. Applies to nodes, edges and -# relations. -# Nodes included in non-dropped ways are kept regardless of -# a matching drop filter. -# Ways included in non-dropped relations are kept regardless of -# a matching drop filter. - -osm_filter_drop: - area=yes - train=yes|no_match_ways - # access=no - public_transport=stop_area|no_match_nds|no_match_rels - type=multipolygon - railway=platform - railway=station - # service=parking_aisle - highway=proposed - highway=footway - highway=construction - building=yes - building=train_station - leisure=garden - leisure=park - -# Nodes that should act as "no-hup" nodes. These are nodes -# that are contained in multiple ways, but cannot be used -# to switch from one way to another (for example, a -# track crossing in rail networks) - -osm_filter_nohup: - -# Configuration of the OSM road restriction system -# We only support restriction with a single via node -# atm - -osm_node_negative_restriction: - restriction=no_right_turn - restriction=no_left_turn - restriction=no_u_turn - restriction=no_straight_on - restriction:bus=no_right_turn - restriction:bus=no_left_turn - restriction:bus=no_u_turn - restriction:bus=no_straight_on - -osm_node_positive_restriction: - restriction=only_left_turn - restriction=only_straight_on - restriction=only_right_turn - restriction:bus=only_left_turn - restriction:bus=only_straight_on - restriction:bus=only_right_turn - -osm_filter_no_restriction: - except=psv|mult_val_match - except=bus|mult_val_match - -# Edges that should act as one-way nodes. - -osm_filter_oneway: - junction=roundabout # oneway=yes is implied - highway=motorway # oneway=yes is implied - oneway=yes - oneway=1 - oneway=true - oneway:bus=yes - oneway:bus=1 - oneway:bus=true - oneway:psv=yes - oneway:psv=1 - oneway:psv=true - -osm_filter_oneway_reverse: - oneway=-1 - -# Edges that may explicitely be used in -# both directions. May be used to set exception -# to "osm_filter_oneway" - -osm_filter_undirected: - oneway=false - oneway=0 - oneway=alternating - oneway=reversible - oneway=no - oneway:bus=no - oneway:bus=0 - oneway:bus=false - oneway:psv=no - oneway:psv=0 - oneway:psv=false - busway=opposite_lane - busway=opposite - busway:left=opposite_lane - busway:right=opposite_lane - psv=opposite_lane - psv=opposite - -# Nodes that are stations. -# Only nodes that have been kept during the filtering above will be -# checked. -osm_filter_station: - public_transport=stop_position - bus_stop=* - stop=* - highway=bus_stop - amenity=bus_station - -# Relation fields that should be used for catching the lines that -# occur on an edge. Only relations that have been kept during the -# filtering above will be checked. The 'linename' will be normalized -# according to the rules in line_normalization_chain. -# The 'from_name' and 'to_name' will be normalized according to the -# rules in station_normalization_chain. -# The relations tags are given in the order of their relevance - -# the first normalized tag-value that is not null/empty will be -# taken. -osm_line_relation_tags: - - -# attr name together with the -# max distance in meters between any of the groups members and -# a potential new member -# first matching rule will be taken -# only applies to nodes that match osm_filter_station! -osm_station_group_attrs: - uic_ref=500 - wikidata=500 - name=100 - -# max distance in meters between a snapped station position and the -# original station position -osm_max_snap_distance: 10 , 50, 100 - -osm_max_snap_level: 5 - -osm_max_osm_station_distance: 7.5 - -# sorted by priority, first found attr will be taken -osm_station_name_attrs: - name - uic_name - -# the track number tag in stop nodes, first one is taken -osm_track_number_tags: local_ref - -routing_lvl0_fac: 1 # default level -routing_lvl1_fac: 1.25 -routing_lvl2_fac: 1.5 -routing_lvl3_fac: 1.75 -routing_lvl4_fac: 2.25 -routing_lvl5_fac: 3 -routing_lvl6_fac: 4 -routing_lvl7_fac: 5 - -# Punishment (in meters) to add to the distance -# function if a vehicle performans a full turn -routing_full_turn_punish: 500 - -routing_station_distance_punish_fac: 2.5 - -routing_non_osm_station_punish: 500 - -# Max angle that should be counted as a full turn -routing_full_turn_angle: 20 - -# Max angle in a route from a station to an already reachable neighbor -routing_snap_full_turn_angle: 110 - -osm_max_node_block_distance: 10 - -# Punishment (in meters) to add to the distance -# function if a vehicle passes a station node without -# stopping there -routing_pass_thru_station_punish: 0 - -# Punishment factor for every meter a vehicle -# travels through a one-way edge -routing_one_way_meter_punish_fac: 4 - -routing_one_way_edge_punish: 5000 - -# Punishment factor for every meter a vehicle -# travels through an edge without any matching line -# information -# routing_line_unmatched_punish_fac: 1.75 - -[tram, subway, funicular] - -# OSM entities to keep on different levels, as k=v. Applies -# to nodes, edges and relations. -# Nodes included in kept ways are always kept. -# Ways included in kept relations are always kept. - -osm_filter_keep: - route=tram - railway=subway - railway=light_rail - railway=tram - railway=funicular - railway=station - railway=halt - railway=tram_stop - route=subway - route=light_rail - subway=yes - tram=yes - -osm_filter_lv2: - service=siding - -osm_filter_lvl5: - service=crossover - service=yard - -# OSM entities to drop, as k=v. Applies to nodes, edges and -# relations. -# Nodes included in non-dropped ways are kept regardless of -# a matching drop filter. -# Ways included in non-dropped relations are kept regardless of -# a matching drop filter. - -osm_filter_drop: - area=yes - public_transport=stop_area - type=multipolygon - railway=platform - public_transport=platform - service=alley - -# Nodes that should act as "no-hup" nodes. These are nodes -# that are contained in multiple ways, but cannot be used -# to switch from one way to another (for example, a -# track crossing in rail networks) - -osm_filter_nohup: - railway:switch=no - railway=railway_crossing - -# Edges that should act as one-way nodes. - -osm_filter_oneway: - oneway=yes - -# Edges that may explicitely be used in -# both directions. May be used to set exception -# to "osm_filter_oneway" - -osm_filter_undirected: - -# Nodes that are stations. -# Only nodes that have been kept during the filtering above will be -# checked. -osm_filter_station: - public_transport=stop_position - station=subway - station=tram - railway=stop - railway=halt - railway=station - railway=tram_stop - railway=subway_stop - tram_stop=* - stop=* - -# Relation fields that should be used for catching the lines that -# occur on an edge. Only relations that have been kept during the -# filtering above will be checked. The 'linename' will be normalized -# according to the rules in line_normalization_chain. -# The 'from_name' and 'to_name' will be normalized according to the -# rules in station_normalization_chain. -# The relations tags are given in the order of their relevance - -# the first normalized tag-value that is not null/empty will be -# taken. -osm_line_relation_tags: - - -# attr name together with the -# max distance in meters between any of the groups members and -# a potential new member -# first matching rule will be taken -# only applies to nodes that match osm_filter_station! -osm_station_group_attrs: - uic_ref=500 - wikidata=500 - name=100 - -# max distance in meters between a snapped station position and the -# original station position -osm_max_snap_distance: 10, 50, 100 - -osm_max_snap_level: 4 - - -# sorted by priority, first found attr will be taken -osm_station_name_attrs: - name - uic_name - -# the track number tag in stop nodes, first one is taken -osm_track_number_tags: local_ref - -routing_lvl0_fac: 1 # default level -routing_lvl1_fac: 1.5 -routing_lvl2_fac: 2 -routing_lvl3_fac: 2.5 -routing_lvl4_fac: 3.5 -routing_lvl5_fac: 5 -routing_lvl6_fac: 5 -routing_lvl7_fac: 5 - -# Punishment (in meters) to add to the distance -# function if a vehicle performans a full turn -routing_full_turn_punish: 2000 - -routing_station_distance_punish_fac: 3.14 - -routing_non_osm_station_punish: 235 - -# Max angle that should be counted as a full turn -routing_full_turn_angle: 80 - -# Max angle in a route from a station to an already reachable neighbar -routing_snap_full_turn_angle: 80 - -# Punishment (in meters) to add to the distance -# function if a vehicle passes a station node without -# stopping there -routing_pass_thru_station_punish: 100 - -# Punishment factor for every meter a vehicle -# travels through a one-way edge -routing_one_way_meter_punish_fac: 1 - -# Punishment factor for every meter a vehicle -# travels through an edge without any matching line -# information -routing_line_unmatched_punish_fac: 0.5 - -[ferry] - -# OSM entities to keep on different levels, as k=v. Applies -# to nodes, edges and relations. -# Nodes included in kept ways are always kept. -# Ways included in kept relations are always kept. - -osm_filter_keep: - route=ferry - waterway=river - motorboat=yes - ferry=yes - -# Nodes that are stations. -# Only nodes that have been kept during the filtering above will be -# checked. -osm_filter_station: - public_transport=stop_position - station=ferry - railway=stop - railway=halt - railway=station - stop=* - -# Relation fields that should be used for catching the lines that -# occur on an edge. Only relations that have been kept during the -# filtering above will be checked. The 'linename' will be normalized -# according to the rules in line_normalization_chain. -# The 'from_name' and 'to_name' will be normalized according to the -# rules in station_normalization_chain. -# The relations tags are given in the order of their relevance - -# the first normalized tag-value that is not null/empty will be -# taken. -osm_line_relation_tags: - - -# attr name together with the -# max distance in meters between any of the groups members and -# a potential new member -# first matching rule will be taken -# only applies to nodes that match osm_filter_station! -osm_station_group_attrs: - uic_ref=500 - wikidata=500 - name=100 - -# max distance in meters between a snapped station position and the -# original station position -osm_max_snap_distance: 10, 100, 200 - -osm_max_snap_level: 4 - - -# sorted by priority, first found attr will be taken -osm_station_name_attrs: - name - uic_name - -# the track number tag in stop nodes, first one is taken -osm_track_number_tags: local_ref - -routing_lvl0_fac: 1 # default level -routing_lvl1_fac: 1.5 -routing_lvl2_fac: 2 -routing_lvl3_fac: 2.5 -routing_lvl4_fac: 3.5 -routing_lvl5_fac: 5 -routing_lvl6_fac: 5 -routing_lvl7_fac: 5 - -# Punishment (in meters) to add to the distance -# function if a vehicle performans a full turn -routing_full_turn_punish: 100 - -routing_station_distance_punish_fac: 3.14 - -routing_non_osm_station_punish: 50 - -# Max angle that should be counted as a full turn -routing_full_turn_angle: 45 - -# Max angle in a route from a station to an already reachable neighbar -routing_snap_full_turn_angle: 0 - -# Punishment (in meters) to add to the distance -# function if a vehicle passes a station node without -# stopping there -routing_pass_thru_station_punish: 0 - -# Punishment factor for every meter a vehicle -# travels through a one-way edge -routing_one_way_meter_punish_fac: 1 - -# Punishment factor for every meter a vehicle -# travels through an edge without any matching line -# information -routing_line_unmatched_punish_fac: 0.5 - -[tram, bus, subway, rail, gondola, funicular, ferry] -# Regular expressions and station comparision is -# always case insensitive! -station_normalize_chain: - , -> ' '; - - -> ' '; - — -> ' '; - _ -> ' '; - " -> ''; - ' -> ''; - ` -> ''; - \( -> ' '; - \) -> ' '; - \[ -> ' '; - \] -> ' '; - / -> ' '; - '\\' -> ' '; - < -> ' '; - > -> ' '; - & -> '+'; - ä -> ae; - ö -> oe; - ü -> ue; - ß -> ss; - è -> e; - é -> e; - á -> a; - à -> a; - ó -> o; - ò -> o; - ô -> o; - ç -> c; - í -> i; - ú -> u; - ù -> u; - ë -> e; - å -> ae; - â -> a; - ê -> e; - ï -> i; - œ -> oe; - ø -> oe; - str\. -> strasse; - av\. -> avenue; - - # always separate 'street', 'strasse' - '([a-zA-Z])strasse($| )' -> '\1 strasse\2'; - '([a-zA-Z])street($| )' -> '\1 street\2'; - - # always use "street" - '(^| )strasse($| )' -> '\1street\2'; - - # always use "avenue" - '(^| )avenida($| )' -> '\1avenue\2'; - '(^| )avenu($| )' -> '\1avenue\2'; - - # normalize every possible abbr. of german "Bahnhof", "Hauptbahnhof", "Busbahnhof" - '(^| )hauptbf\.($| )' -> '\1hauptbahnhof\2'; - '(^| )hauptbf($| )' -> '\1hauptbahnhof\2'; - '(^| )hauptbhf\.($| )' -> '\1hauptbahnhof\2'; - '(^| )hauptbhf($| )' -> '\1hauptbahnhof\2'; - '(^| )zentraler busbahnhof($| )$' -> \1busbahnhof\2; - '(^| )zentraler omnibusbahnhof($| )$' -> \1busbahnhof\2; - '(^| )omnibusbahnhof($| )' -> '\1busbahnhof\2'; - '(^| )omnibusbhf($| )' -> '\1busbahnhof\2'; - '(^| )busbf\.($| )' -> '\1busbahnhof\2'; - '(^| )busbf($| )' -> '\1busbahnhof\2'; - '(^| )bus bf\.($| )' -> '\1busbahnhof\2'; - '(^| )bus bf($| )' -> '\1busbahnhof\2'; - '(^| )busbhf\.($| )' -> '\1busbahnhof\2'; - '(^| )busbhf($| )' -> '\1busbahnhof\2'; - '(^| )bus bhf\.($| )' -> '\1busbahnhof\2'; - '(^| )bus bhf($| )' -> '\1busbahnhof\2'; - '(^| )zob($| )' -> '\1busbahnhof\2'; - '(^| )hbf\.($| )' -> '\1hauptbahnhof\2'; - '(^| )hbf($| )' -> '\1hauptbahnhof\2'; - '(^| )hb\.($| )' -> '\1hauptbahnhof\2'; - '(^| )hb($| )' -> '\1hauptbahnhof\2'; - '(^| )bf\.($| )' -> '\1bahnhof\2'; - '(^| )bf($| )' -> '\1bahnhof\2'; - '(^| )bhf\.($| )' -> '\1bahnhof\2'; - '(^| )bhf($| )' -> '\1bahnhof\2'; - '(^| )bhfeingang($| )' -> '\1bahnhofeingang\2'; - '(^| )gare de($| )' -> '\1gare\2'; - - - # if a stations starts with single station identifier - # always put it at the end (for example, "hauptbahnhof freiburg" becomes "freiburg hauptbahnhof") - '^hauptbahnhof (.+)$' -> \1 hauptbahnhof; - '^bahnhof (.+)$' -> \1 bahnhof; - '^busbahnhof (.+)$' -> \1 busbahnhof; - '^gare (.+)$' -> \1 gare; - '^station (.+)$' -> \1 station; - - '(^| )busbahnhof($| )' -> '\1bbahnhof\2'; - - # normalize line types in station names - '(^| )u bahn\.($| )' -> '\1ubahn\2'; - '(^| )metro\.($| )' -> '\1ubahn\2'; - '(^| )subway\.($| )' -> '\1ubahn\2'; - '(^| )underground\.($| )' -> '\1ubahn\2'; - '(^| )ubahn($| )' -> '\1u\2'; - '(^| )s bahn\.($| )' -> '\1sbahn\2'; - '(^| )sbahn($| )' -> '\1s\2'; - '(^| )tramway($| )' -> '\1tram\2'; - '(^| )stadtbahn($| )' -> '\1tram\2'; - '(^| )strassenbahn($| )' -> '\1tram\2'; - '(^| )streetcar($| )' -> '\1tram\2'; - '(^| )tram($| )' -> '\1t\2'; - - # delete track information from name - '(^| )kante [a-zA-Z0-9]{1,2}($| )' -> ' '; - '(^| )gleis [a-zA-Z0-9]{1,2}($| )' -> ' '; - '(^| )track [a-zA-Z0-9]{1,2}($| )' -> ' '; - '(^| )voie [a-zA-Z0-9]{1,2}($| )' -> ' '; - - # abbrv - '(^| )und($| )' -> '\1+\2'; - '(^| )and($| )' -> '\1+\2'; - '(^| )et($| )' -> '\1+\2'; - - # noise - '\sde\s' -> ' '; - '\sda\s' -> ' '; - '\sdi\s' -> ' '; - '\sdel\s' -> ' '; - '\sdal\s' -> ' '; - - # abbrv in most western languages - '(^| )saint ' -> '\1st. '; - '(^| )sankt ' -> '\1st. '; - '(^| )sanct ' -> '\1st. '; - - \. -> ' '; - - # whitespace - \s+ -> ' '; - ^\s -> ''; - \s$ -> ''; - -line_normalize_chain: - , -> ' '; - - -> ' '; - _ -> ' '; - " -> ''; - ' -> ''; - ` -> ''; - / -> ' '; - < -> ' '; - > -> ' '; - & -> '+'; - ä -> ae; - ö -> oe; - ü -> ue; - ß -> ss; - è -> e; - é -> e; - á -> a; - à -> a; - ó -> o; - ò -> o; - í -> i; - ú -> u; - ù -> u; - ë -> e; - å -> ae; - ç -> c; - â -> a; - ê -> e; - ï -> i; - œ -> oe; - ø -> oe; - ^line -> ''; - ^linie -> ''; - ^metro -> ''; - ^tram -> ''; - ^strassenbahn -> ''; - ^bus -> ''; - - # delete everything in brackets - \(.+\) -> ' '; - \[.+\] -> ' '; - - # whitespace - \s+ -> ' '; - ^\s -> ''; - \s$ -> ''; - - # line/number combs ALWAYS without whitespace (T 2 -> T2) - ^([a-zA-Z]+) ([0-9]+)$ -> \1\2; - -track_normalize_chain: - '(^| )gleis($| )' -> ''; - '(^| )gl\.($| )' -> ''; - '(^| )platform($| )' -> ''; - '(^| )track($| )' -> ''; - '(^| )rail($| )' -> ''; - # line/number combs ALWAYS without whitespace (1 A -> 1A) - ^([a-zA-Z]+) ([0-9]+)$ -> \1\2; - ^([0-9]+) ([a-zA-Z]+)$ -> \1\2; - - # delete track numbers greater than 999 - ^[0-9]{4,}$ -> ''; diff --git a/eval/eval.cfg b/eval/eval.cfg deleted file mode 100644 index 6ad4ae9..0000000 --- a/eval/eval.cfg +++ /dev/null @@ -1,993 +0,0 @@ -# Copyright 2018, University of Freiburg -# Chair of Algorithms and Datastructures -# Authors: Patrick Brosi - -[rail] - -# OSM entities to keep on different levels, as k=v. Applies -# to nodes, edges and relations. -# Nodes included in kept ways are always kept. -# Ways included in kept relations are always kept. - -osm_filter_keep: - railway=rail - railway=light_rail - railway=narrow_gauge - route=rail - route=train - public_transport=stop_area|rel_flat - -osm_filter_lvl1: - usage=branch - -osm_filter_lvl2: - -osm_filter_lvl3: - service=crossover - service=siding - # we cannot completely drop service=yard, because it is often used - # incorrectly for crossovers - service=yard - -osm_filter_lvl4: - -osm_filter_lvl5: - usage=industrial - usage=military - usage=test - service=spur - railway:traffic_mode=freight - -# OSM entities to drop, as k=v. Applies to nodes, edges and -# relations. -# Nodes included in non-dropped ways are kept regardless of -# a matching drop filter. -# Ways included in non-dropped relations are kept regardless of -# a matching drop filter. - -osm_filter_drop: - railway=abandoned - railway=construction - railway=disused - railway=miniature - railway=signal - railway=razed - railway=proposed - metro=yes - area=yes - # access=no - type=multipolygon - railway=platform - public_transport=platform - building=yes - building=train_station - amenity=shelter - amenity=bus_station - building=roof - -# Nodes that should act as "no-hup" nodes. These are nodes -# that are contained in multiple ways, but cannot be used -# to switch from one way to another (for example, a -# track crossing in rail networks) - -osm_filter_nohup: - railway:switch=no - railway=railway_crossing - -# Edges that should act as one-way nodes. - -osm_filter_oneway: - oneway=yes - railway:preferred_direction=forward - -osm_filter_oneway_reverse: - railway:preferred_direction=backward - -# Edges that may explicitely be used in -# both directions. May be used to set exception -# to "osm_filter_oneway" - -osm_filter_undirected: - oneway=false - oneway=no - oneway=-1 - railway:preferred_direction=both - railway:bidirectional=regular - -# Nodes that are stations. -# Only nodes that have been kept during the filtering above will be -# checked. -osm_filter_station: - public_transport=stop_position - railway=stop - railway=halt - railway=station - #railway=tram_stop - railway=subway_stop - tram_stop=* - stop=* - -# Relation fields that should be used for catching the lines that -# occur on an edge. Only relations that have been kept during the -# filtering above will be checked. The 'linename' will be normalized -# according to the rules in line_normalization_chain. -# The 'from_name' and 'to_name' will be normalized according to the -# rules in station_normalization_chain. -# The relations tags are given in the order of their relevance - -# the first normalized tag-value that is not null/empty will be -# taken. -osm_line_relation_tags: - line_name=ref,name # careful, no space after/before comma allowed! - from_name=from - to_name=to - -# attr name together with the -# max distance in meters between any of the groups members and -# a potential new member -# first matching rule will be taken -# only applies to nodes that match osm_filter_station! -osm_station_group_attrs: - uic_ref=500 - wikidata=500 - [public_transport=stop_area]uic_ref=500 - [public_transport=stop_area]wikidata=500 - name=100 - [public_transport=stop_area]name=100 - -# max distance in meters between a snapped station position and the -# original station position -osm_max_snap_distance: 10, 100, 200 - -# max edge level to which station will be snapped -osm_max_snap_level: 2 - -# sorted by priority, first found attr will be taken -osm_station_name_attrs: - name - [public_transport=stop_area]name - uic_name - -# the track number tag in edges, first match is taken -osm_edge_track_number_tags: - railway:track_ref - local_ref - ref - -# the track number tag in stop nodes, first match is taken, -# overwrites osm_edge_track_number_tags -osm_track_number_tags: - local_ref - ref - -routing_lvl0_fac: 1 # default level -routing_lvl1_fac: 1.25 -routing_lvl2_fac: 1.5 -routing_lvl3_fac: 2 -routing_lvl4_fac: 2.5 -routing_lvl5_fac: 3.5 -routing_lvl6_fac: 5 -routing_lvl7_fac: 7 - -# Punishment (in meters) to add to the distance -# function if a vehicle performans a full turn -routing_full_turn_punish: 3000 - -routing_station_distance_punish_fac: 3.14 - -routing_non_osm_station_punish: 100 - -routing_platform_unmatched_punish: 2000 - -# Max angle that should be counted as a full turn -routing_full_turn_angle: 100 - -# Max angle in a route from a station to an already reachable neighbar -routing_snap_full_turn_angle: 100 - -# Punishment (in meters) to add to the distance -# function if a vehicle passes a station node without -# stopping there -routing_pass_thru_station_punish: 100 - -# Punishment factor for every meter a vehicle -# travels through a one-way edge -routing_one_way_meter_punish_fac: 1 - -# Punishment factor for every meter a vehicle -# travels through an edge without any matching line -# information -routing_line_unmatched_punish_fac: 1 - -# special line normalization for trains -line_normalize_chain: - , -> ' '; - - -> ' '; - _ -> ' '; - " -> ''; - ' -> ''; - ` -> ''; - / -> ' '; - < -> ' '; - > -> ' '; - & -> '+'; - ä -> ae; - ö -> oe; - ü -> ue; - ß -> ss; - è -> e; - é -> e; - á -> a; - à -> a; - ó -> o; - ò -> o; - í -> i; - ú -> u; - ù -> u; - ë -> e; - ç -> c; - å -> ae; - â -> a; - ê -> e; - ï -> i; - œ -> oe; - ø -> oe; - ^line -> ''; - ^linie -> ''; - ^metro -> ''; - ^tram -> ''; - ^strassenbahn -> ''; - ^bus -> ''; - - # delete everything in brackets - \(.+\) -> ' '; - \[.+\] -> ' '; - - # whitespace - \s+ -> ' '; - ^\s -> ''; - \s$ -> ''; - - # line/number combs ALWAYS with whitespace (ICE101 -> ICE 101) - ^([a-zA-Z]+)([0-9]+)$ -> \1 \2; - - # if a character line number is present, delete the numeric part - ^([a-zA-Z]+) [0-9]+$ -> \1; - -[bus] - -# OSM entities to keep on different levels, as k=v. Applies -# to nodes, edges and relations. -# Nodes included in kept ways are always kept. -# Ways included in kept relations are always kept. - -osm_filter_keep: - # highways - highway=motorway - highway=trunk - highway=primary - highway=secondary - highway=tertiary - highway=residential - highway=living_street - highway=unclassified - - # highway links - highway=motorway_link - highway=trunk_link - highway=primary_link - highway=secondary_link - highway=tertiary_link - highway=residential_link - - way=primary - way=seconday - way=bus_guideway - highway=bus_guideway - busway=* - psv=yes - psv=designated - - trolley_wire=yes - trolleywire=yes - trolleybus=yes - trolley_bus=yes - - route=bus - route=trolleybus - bus=yes - - public_transport=stop_position - bus_stop=* - stop=* - highway=bus_stop - amenity=bus_station|no_match_ways|no_match_rels - - # relations for the restriction system - type=restriction - type=restriction:bus - type=restriction:motorcar - -osm_filter_lvl1: - highway=secondary - highway=secondary_link - bus=yes - psv=yes - access=psv - access=bus - trolley_wire=yes - trolleywire=yes - trolleybus=yes - trolley_bus=yes - psv=designated - -osm_filter_lvl2: - highway=tertiary - highway=tertiary_link - -osm_filter_lvl3: - highway=unclassified - highway=residential - highway=road - -osm_filter_lvl4: - highway=living_street - highway=pedestrian - highway=service - -osm_filter_lvl5: - service=siding - access=permissive - access=private - access=no - service=parking_aisle - highway=footway - -# OSM entities to drop, as k=v. Applies to nodes, edges and -# relations. -# Nodes included in non-dropped ways are kept regardless of -# a matching drop filter. -# Ways included in non-dropped relations are kept regardless of -# a matching drop filter. - -osm_filter_drop: - area=yes - train=yes|no_match_ways - # access=no - public_transport=stop_area|no_match_nds|no_match_rels - type=multipolygon - railway=platform - railway=station - # service=parking_aisle - highway=proposed - highway=footway - highway=construction - building=yes - building=train_station - leisure=garden - leisure=park - -# Nodes that should act as "no-hup" nodes. These are nodes -# that are contained in multiple ways, but cannot be used -# to switch from one way to another (for example, a -# track crossing in rail networks) - -osm_filter_nohup: - -# Configuration of the OSM road restriction system -# We only support restriction with a single via node -# atm - -osm_node_negative_restriction: - restriction=no_right_turn - restriction=no_left_turn - restriction=no_u_turn - restriction=no_straight_on - restriction:bus=no_right_turn - restriction:bus=no_left_turn - restriction:bus=no_u_turn - restriction:bus=no_straight_on - -osm_node_positive_restriction: - restriction=only_left_turn - restriction=only_straight_on - restriction=only_right_turn - restriction:bus=only_left_turn - restriction:bus=only_straight_on - restriction:bus=only_right_turn - -osm_filter_no_restriction: - except=psv|mult_val_match - except=bus|mult_val_match - -# Edges that should act as one-way nodes. - -osm_filter_oneway: - junction=roundabout # oneway=yes is implied - highway=motorway # oneway=yes is implied - oneway=yes - oneway=1 - oneway=true - oneway:bus=yes - oneway:bus=1 - oneway:bus=true - oneway:psv=yes - oneway:psv=1 - oneway:psv=true - -osm_filter_oneway_reverse: - oneway=-1 - -# Edges that may explicitely be used in -# both directions. May be used to set exception -# to "osm_filter_oneway" - -osm_filter_undirected: - oneway=false - oneway=0 - oneway=alternating - oneway=reversible - oneway=no - oneway:bus=no - oneway:bus=0 - oneway:bus=false - oneway:psv=no - oneway:psv=0 - oneway:psv=false - busway=opposite_lane - busway=opposite - busway:left=opposite_lane - busway:right=opposite_lane - psv=opposite_lane - psv=opposite - -# Nodes that are stations. -# Only nodes that have been kept during the filtering above will be -# checked. -osm_filter_station: - public_transport=stop_position - bus_stop=* - stop=* - highway=bus_stop - amenity=bus_station - -# Relation fields that should be used for catching the lines that -# occur on an edge. Only relations that have been kept during the -# filtering above will be checked. The 'linename' will be normalized -# according to the rules in line_normalization_chain. -# The 'from_name' and 'to_name' will be normalized according to the -# rules in station_normalization_chain. -# The relations tags are given in the order of their relevance - -# the first normalized tag-value that is not null/empty will be -# taken. -osm_line_relation_tags: - line_name=ref,name # careful, no space after/before comma allowed! - from_name=from - to_name=to - - -# attr name together with the -# max distance in meters between any of the groups members and -# a potential new member -# first matching rule will be taken -# only applies to nodes that match osm_filter_station! -osm_station_group_attrs: - uic_ref=500 - wikidata=500 - name=100 - -# max distance in meters between a snapped station position and the -# original station position -osm_max_snap_distance: 10 , 50, 100 - -osm_max_snap_level: 5 - -osm_max_osm_station_distance: 7.5 - -# sorted by priority, first found attr will be taken -osm_station_name_attrs: - name - uic_name - -# the track number tag in stop nodes, first one is taken -osm_track_number_tags: local_ref - -routing_lvl0_fac: 1 # default level -routing_lvl1_fac: 1.25 -routing_lvl2_fac: 1.5 -routing_lvl3_fac: 1.75 -routing_lvl4_fac: 2.25 -routing_lvl5_fac: 3 -routing_lvl6_fac: 4 -routing_lvl7_fac: 5 - -# Punishment (in meters) to add to the distance -# function if a vehicle performans a full turn -routing_full_turn_punish: 500 - -routing_station_distance_punish_fac: 2.5 - -routing_non_osm_station_punish: 500 - -# Max angle that should be counted as a full turn -routing_full_turn_angle: 20 - -# Max angle in a route from a station to an already reachable neighbor -routing_snap_full_turn_angle: 110 - -osm_max_node_block_distance: 10 - -# Punishment (in meters) to add to the distance -# function if a vehicle passes a station node without -# stopping there -routing_pass_thru_station_punish: 0 - -# Punishment factor for every meter a vehicle -# travels through a one-way edge -routing_one_way_meter_punish_fac: 4 - -routing_one_way_edge_punish: 5000 - -# Punishment factor for every meter a vehicle -# travels through an edge without any matching line -# information -# routing_line_unmatched_punish_fac: 1.75 - -[tram, subway, funicular] - -# OSM entities to keep on different levels, as k=v. Applies -# to nodes, edges and relations. -# Nodes included in kept ways are always kept. -# Ways included in kept relations are always kept. - -osm_filter_keep: - route=tram - railway=subway - railway=light_rail - railway=tram - railway=funicular - railway=station - railway=halt - railway=tram_stop - route=subway - route=light_rail - subway=yes - tram=yes - -osm_filter_lv2: - service=siding - -osm_filter_lvl5: - service=crossover - service=yard - -# OSM entities to drop, as k=v. Applies to nodes, edges and -# relations. -# Nodes included in non-dropped ways are kept regardless of -# a matching drop filter. -# Ways included in non-dropped relations are kept regardless of -# a matching drop filter. - -osm_filter_drop: - area=yes - public_transport=stop_area - type=multipolygon - railway=platform - public_transport=platform - service=alley - -# Nodes that should act as "no-hup" nodes. These are nodes -# that are contained in multiple ways, but cannot be used -# to switch from one way to another (for example, a -# track crossing in rail networks) - -osm_filter_nohup: - railway:switch=no - railway=railway_crossing - -# Edges that should act as one-way nodes. - -osm_filter_oneway: - oneway=yes - -# Edges that may explicitely be used in -# both directions. May be used to set exception -# to "osm_filter_oneway" - -osm_filter_undirected: - -# Nodes that are stations. -# Only nodes that have been kept during the filtering above will be -# checked. -osm_filter_station: - public_transport=stop_position - station=subway - station=tram - railway=stop - railway=halt - railway=station - railway=tram_stop - railway=subway_stop - tram_stop=* - stop=* - -# Relation fields that should be used for catching the lines that -# occur on an edge. Only relations that have been kept during the -# filtering above will be checked. The 'linename' will be normalized -# according to the rules in line_normalization_chain. -# The 'from_name' and 'to_name' will be normalized according to the -# rules in station_normalization_chain. -# The relations tags are given in the order of their relevance - -# the first normalized tag-value that is not null/empty will be -# taken. -osm_line_relation_tags: - line_name=ref,name # careful, no space after/before comma allowed! - from_name=from - to_name=to - - -# attr name together with the -# max distance in meters between any of the groups members and -# a potential new member -# first matching rule will be taken -# only applies to nodes that match osm_filter_station! -osm_station_group_attrs: - uic_ref=500 - wikidata=500 - name=100 - -# max distance in meters between a snapped station position and the -# original station position -osm_max_snap_distance: 10, 50, 100 - -osm_max_snap_level: 4 - - -# sorted by priority, first found attr will be taken -osm_station_name_attrs: - name - uic_name - -# the track number tag in stop nodes, first one is taken -osm_track_number_tags: local_ref - -routing_lvl0_fac: 1 # default level -routing_lvl1_fac: 1.5 -routing_lvl2_fac: 2 -routing_lvl3_fac: 2.5 -routing_lvl4_fac: 3.5 -routing_lvl5_fac: 5 -routing_lvl6_fac: 5 -routing_lvl7_fac: 5 - -# Punishment (in meters) to add to the distance -# function if a vehicle performans a full turn -routing_full_turn_punish: 2000 - -routing_station_distance_punish_fac: 3.14 - -routing_non_osm_station_punish: 235 - -# Max angle that should be counted as a full turn -routing_full_turn_angle: 80 - -# Max angle in a route from a station to an already reachable neighbar -routing_snap_full_turn_angle: 80 - -# Punishment (in meters) to add to the distance -# function if a vehicle passes a station node without -# stopping there -routing_pass_thru_station_punish: 100 - -# Punishment factor for every meter a vehicle -# travels through a one-way edge -routing_one_way_meter_punish_fac: 1 - -# Punishment factor for every meter a vehicle -# travels through an edge without any matching line -# information -routing_line_unmatched_punish_fac: 0.5 - -[ferry] - -# OSM entities to keep on different levels, as k=v. Applies -# to nodes, edges and relations. -# Nodes included in kept ways are always kept. -# Ways included in kept relations are always kept. - -osm_filter_keep: - route=ferry - waterway=river - motorboat=yes - ferry=yes - -# Nodes that are stations. -# Only nodes that have been kept during the filtering above will be -# checked. -osm_filter_station: - public_transport=stop_position - station=ferry - railway=stop - railway=halt - railway=station - stop=* - -# Relation fields that should be used for catching the lines that -# occur on an edge. Only relations that have been kept during the -# filtering above will be checked. The 'linename' will be normalized -# according to the rules in line_normalization_chain. -# The 'from_name' and 'to_name' will be normalized according to the -# rules in station_normalization_chain. -# The relations tags are given in the order of their relevance - -# the first normalized tag-value that is not null/empty will be -# taken. -osm_line_relation_tags: - line_name=ref,name # careful, no space after/before comma allowed! - from_name=from - to_name=to - - -# attr name together with the -# max distance in meters between any of the groups members and -# a potential new member -# first matching rule will be taken -# only applies to nodes that match osm_filter_station! -osm_station_group_attrs: - uic_ref=500 - wikidata=500 - name=100 - -# max distance in meters between a snapped station position and the -# original station position -osm_max_snap_distance: 10, 100, 200 - -osm_max_snap_level: 4 - - -# sorted by priority, first found attr will be taken -osm_station_name_attrs: - name - uic_name - -# the track number tag in stop nodes, first one is taken -osm_track_number_tags: local_ref - -routing_lvl0_fac: 1 # default level -routing_lvl1_fac: 1.5 -routing_lvl2_fac: 2 -routing_lvl3_fac: 2.5 -routing_lvl4_fac: 3.5 -routing_lvl5_fac: 5 -routing_lvl6_fac: 5 -routing_lvl7_fac: 5 - -# Punishment (in meters) to add to the distance -# function if a vehicle performans a full turn -routing_full_turn_punish: 100 - -routing_station_distance_punish_fac: 3.14 - -routing_non_osm_station_punish: 50 - -# Max angle that should be counted as a full turn -routing_full_turn_angle: 45 - -# Max angle in a route from a station to an already reachable neighbar -routing_snap_full_turn_angle: 0 - -# Punishment (in meters) to add to the distance -# function if a vehicle passes a station node without -# stopping there -routing_pass_thru_station_punish: 0 - -# Punishment factor for every meter a vehicle -# travels through a one-way edge -routing_one_way_meter_punish_fac: 1 - -# Punishment factor for every meter a vehicle -# travels through an edge without any matching line -# information -routing_line_unmatched_punish_fac: 0.5 - -[tram, bus, subway, rail, gondola, funicular, ferry] -# Regular expressions and station comparision is -# always case insensitive! -station_normalize_chain: - , -> ' '; - - -> ' '; - — -> ' '; - _ -> ' '; - " -> ''; - ' -> ''; - ` -> ''; - \( -> ' '; - \) -> ' '; - \[ -> ' '; - \] -> ' '; - / -> ' '; - '\\' -> ' '; - < -> ' '; - > -> ' '; - & -> '+'; - ä -> ae; - ö -> oe; - ü -> ue; - ß -> ss; - è -> e; - é -> e; - á -> a; - à -> a; - ó -> o; - ò -> o; - ô -> o; - ç -> c; - í -> i; - ú -> u; - ù -> u; - ë -> e; - å -> ae; - â -> a; - ê -> e; - ï -> i; - œ -> oe; - ø -> oe; - str\. -> strasse; - av\. -> avenue; - - # always separate 'street', 'strasse' - '([a-zA-Z])strasse($| )' -> '\1 strasse\2'; - '([a-zA-Z])street($| )' -> '\1 street\2'; - - # always use "street" - '(^| )strasse($| )' -> '\1street\2'; - - # always use "avenue" - '(^| )avenida($| )' -> '\1avenue\2'; - '(^| )avenu($| )' -> '\1avenue\2'; - - # normalize every possible abbr. of german "Bahnhof", "Hauptbahnhof", "Busbahnhof" - '(^| )hauptbf\.($| )' -> '\1hauptbahnhof\2'; - '(^| )hauptbf($| )' -> '\1hauptbahnhof\2'; - '(^| )hauptbhf\.($| )' -> '\1hauptbahnhof\2'; - '(^| )hauptbhf($| )' -> '\1hauptbahnhof\2'; - '(^| )zentraler busbahnhof($| )$' -> \1busbahnhof\2; - '(^| )zentraler omnibusbahnhof($| )$' -> \1busbahnhof\2; - '(^| )omnibusbahnhof($| )' -> '\1busbahnhof\2'; - '(^| )omnibusbhf($| )' -> '\1busbahnhof\2'; - '(^| )busbf\.($| )' -> '\1busbahnhof\2'; - '(^| )busbf($| )' -> '\1busbahnhof\2'; - '(^| )bus bf\.($| )' -> '\1busbahnhof\2'; - '(^| )bus bf($| )' -> '\1busbahnhof\2'; - '(^| )busbhf\.($| )' -> '\1busbahnhof\2'; - '(^| )busbhf($| )' -> '\1busbahnhof\2'; - '(^| )bus bhf\.($| )' -> '\1busbahnhof\2'; - '(^| )bus bhf($| )' -> '\1busbahnhof\2'; - '(^| )zob($| )' -> '\1busbahnhof\2'; - '(^| )hbf\.($| )' -> '\1hauptbahnhof\2'; - '(^| )hbf($| )' -> '\1hauptbahnhof\2'; - '(^| )hb\.($| )' -> '\1hauptbahnhof\2'; - '(^| )hb($| )' -> '\1hauptbahnhof\2'; - '(^| )bf\.($| )' -> '\1bahnhof\2'; - '(^| )bf($| )' -> '\1bahnhof\2'; - '(^| )bhf\.($| )' -> '\1bahnhof\2'; - '(^| )bhf($| )' -> '\1bahnhof\2'; - '(^| )bhfeingang($| )' -> '\1bahnhofeingang\2'; - '(^| )gare de($| )' -> '\1gare\2'; - - - # if a stations starts with single station identifier - # always put it at the end (for example, "hauptbahnhof freiburg" becomes "freiburg hauptbahnhof") - '^hauptbahnhof (.+)$' -> \1 hauptbahnhof; - '^bahnhof (.+)$' -> \1 bahnhof; - '^busbahnhof (.+)$' -> \1 busbahnhof; - '^gare (.+)$' -> \1 gare; - '^station (.+)$' -> \1 station; - - '(^| )busbahnhof($| )' -> '\1bbahnhof\2'; - - # normalize line types in station names - '(^| )u bahn\.($| )' -> '\1ubahn\2'; - '(^| )metro\.($| )' -> '\1ubahn\2'; - '(^| )subway\.($| )' -> '\1ubahn\2'; - '(^| )underground\.($| )' -> '\1ubahn\2'; - '(^| )ubahn($| )' -> '\1u\2'; - '(^| )s bahn\.($| )' -> '\1sbahn\2'; - '(^| )sbahn($| )' -> '\1s\2'; - '(^| )tramway($| )' -> '\1tram\2'; - '(^| )stadtbahn($| )' -> '\1tram\2'; - '(^| )strassenbahn($| )' -> '\1tram\2'; - '(^| )streetcar($| )' -> '\1tram\2'; - '(^| )tram($| )' -> '\1t\2'; - - # delete track information from name - '(^| )kante [a-zA-Z0-9]{1,2}($| )' -> ' '; - '(^| )gleis [a-zA-Z0-9]{1,2}($| )' -> ' '; - '(^| )track [a-zA-Z0-9]{1,2}($| )' -> ' '; - '(^| )voie [a-zA-Z0-9]{1,2}($| )' -> ' '; - - # abbrv - '(^| )und($| )' -> '\1+\2'; - '(^| )and($| )' -> '\1+\2'; - '(^| )et($| )' -> '\1+\2'; - - # noise - '\sde\s' -> ' '; - '\sda\s' -> ' '; - '\sdi\s' -> ' '; - '\sdel\s' -> ' '; - '\sdal\s' -> ' '; - - # abbrv in most western languages - '(^| )saint ' -> '\1st. '; - '(^| )sankt ' -> '\1st. '; - '(^| )sanct ' -> '\1st. '; - - \. -> ' '; - - # whitespace - \s+ -> ' '; - ^\s -> ''; - \s$ -> ''; - -line_normalize_chain: - , -> ' '; - - -> ' '; - _ -> ' '; - " -> ''; - ' -> ''; - ` -> ''; - / -> ' '; - < -> ' '; - > -> ' '; - & -> '+'; - ä -> ae; - ö -> oe; - ü -> ue; - ß -> ss; - è -> e; - é -> e; - á -> a; - à -> a; - ó -> o; - ò -> o; - í -> i; - ú -> u; - ù -> u; - ë -> e; - å -> ae; - ç -> c; - â -> a; - ê -> e; - ï -> i; - œ -> oe; - ø -> oe; - ^line -> ''; - ^linie -> ''; - ^metro -> ''; - ^tram -> ''; - ^strassenbahn -> ''; - ^bus -> ''; - - # delete everything in brackets - \(.+\) -> ' '; - \[.+\] -> ' '; - - # whitespace - \s+ -> ' '; - ^\s -> ''; - \s$ -> ''; - - # line/number combs ALWAYS without whitespace (T 2 -> T2) - ^([a-zA-Z]+) ([0-9]+)$ -> \1\2; - -track_normalize_chain: - '(^| )gleis($| )' -> ''; - '(^| )gl\.($| )' -> ''; - '(^| )platform($| )' -> ''; - '(^| )track($| )' -> ''; - '(^| )rail($| )' -> ''; - # line/number combs ALWAYS without whitespace (1 A -> 1A) - ^([a-zA-Z]+) ([0-9]+)$ -> \1\2; - ^([0-9]+) ([a-zA-Z]+)$ -> \1\2; - - # delete track numbers greater than 999 - ^[0-9]{4,}$ -> ''; diff --git a/geo/pfaedle.qgs b/geo/pfaedle.qgs index 8636d80..aa197d3 100644 --- a/geo/pfaedle.qgs +++ b/geo/pfaedle.qgs @@ -5,136 +5,141 @@ - + - + - + - + - + - + - + - + - + - - - - - - + + + + + + - degrees + meters - 867288.79171589459292591 - 6099511.4607889149338007 - 890845.27921608532778919 - 6122540.06078738905489445 + 866081.24618305882904679 + 6076552.62015097495168447 + 870662.44395622855518013 + 6080842.96235341485589743 0 - 0 + 1 - +proj=longlat +datum=WGS84 +no_defs - 3452 - 4326 - EPSG:4326 - WGS 84 - longlat + +proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs + 1353 + 3395 + EPSG:3395 + WGS 84 / World Mercator + merc WGS84 - true + false 0 - + + + + + + + + path20180217155708341 OGRGeoJSON_Point20180203134333739 OGRGeoJSON_LineString20180203134333975 OGRGeoJSON_Point20180206114956218 OGRGeoJSON_LineString20180206114956229 - path20180217155708341 - trgraph_trgraph_LineString20180508200527144 - trgraph_trgraph_Point20180508200527256 + OSM_Transportation20181215024818603 + OpenStreetMap_de20181215024846026 - - + + - - + + - + - - + + - + - - - - - - - - + + + + + + + - + - 875390.4375 - 6113024.5 - 875455.125 - 6113045.5 + 6.70734330570000026 + 47.04982400000000098 + 6.77521899999999988 + 47.07313500000000062 OGRGeoJSON_LineString20180203134333975 ./graph.json @@ -158,8 +163,8 @@ - - + + @@ -174,18 +179,12 @@ - - - - - - @@ -1169,605 +1168,18 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 - 0 - 0 - station_name - - - - . - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - . - - 0 - . - - 0 - generatedlayout - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - OGRGeoJSON_LineString20180206114956229 - ./combgraph.json|layerid=0|subset="dummy" = 'no' - - - - OGRGeoJSON LineString - - - +proj=longlat +datum=WGS84 +no_defs - 3452 - 4326 - EPSG:4326 - WGS 84 - longlat - WGS84 - true - - - ogr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 - station_name + station_alt_names - - + @@ -1814,78 +1226,255 @@ - - . + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - + + + + + . + + 0 + . + + 0 + generatedlayout + + + + + + + + + + + + + + + + + + + + + + + + + + + station_name + + + + 6.70734330570000026 + 47.05522500000000008 + 6.74796900000000033 + 47.06722252349999991 + + OGRGeoJSON_LineString20180206114956229 + ./combgraph.json|layerid=0|subset="dummy" = 'no' + + + + OGRGeoJSON LineString + + + +proj=longlat +datum=WGS84 +no_defs + 3452 + 4326 + EPSG:4326 + WGS 84 + longlat + WGS84 + true + + + + + + + + ogr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . @@ -1921,7 +1510,6 @@ def my_form_open(dialog, layer, feature): - @@ -1931,20 +1519,6 @@ def my_form_open(dialog, layer, feature): - - - - - - - - - - - - - - @@ -1952,14 +1526,14 @@ def my_form_open(dialog, layer, feature): - + station_name - + - 842634.8125 - 6090818 - 884707.25 - 6121615.5 + 6.70734330570000026 + 47.04982400000000098 + 6.77521899999999988 + 47.07313500000000062 OGRGeoJSON_Point20180203134333739 ./graph.json @@ -1983,8 +1557,8 @@ def my_form_open(dialog, layer, feature): - - + + @@ -1999,18 +1573,12 @@ def my_form_open(dialog, layer, feature): - - - - - - @@ -2195,177 +1763,16 @@ def my_form_open(dialog, layer, feature): - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 - station_name + station_alt_names - - + @@ -2412,41 +1819,40 @@ def my_form_open(dialog, layer, feature): - - . + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - + + + . @@ -2488,10 +1897,7 @@ def my_form_open(dialog, layer, feature): generatedlayout - - + @@ -2503,10 +1909,8 @@ def my_form_open(dialog, layer, feature): - - @@ -2521,9 +1925,15 @@ def my_form_open(dialog, layer, feature): - + station_name - + + + 6.70734330570000026 + 47.05522500000000008 + 6.74796900000000033 + 47.06722252349999991 + OGRGeoJSON_Point20180206114956218 ./combgraph.json @@ -2546,8 +1956,8 @@ def my_form_open(dialog, layer, feature): - - + + @@ -2562,9 +1972,6 @@ def my_form_open(dialog, layer, feature): - - - @@ -2592,48 +1999,6 @@ def my_form_open(dialog, layer, feature): - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2679,14 +2044,18 @@ def my_form_open(dialog, layer, feature): - + + + + + 0 0 0 - station_name + station_alt_names - - . + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + - + + + - + + . 0 . - + 0 generatedlayout @@ -2764,7 +2186,6 @@ def my_form_open(dialog, layer, feature): - @@ -2774,34 +2195,112 @@ def my_form_open(dialog, layer, feature): - - - - - - - - - - - - - - - + station_name - + - 735429.75 - 5862813.5 - 738946.375 - 5867690 + -20037508.34278924390673637 + -20037508.34278925508260727 + 20037508.34278924390673637 + 20037508.34278924390673637 + + OSM_Transportation20181215024818603 + crs=EPSG:3857&format&type=xyz&url=http://tile.thunderforest.com/transport/%7Bz%7D/%7Bx%7D/%7By%7D.png&zmax=19&zmin=0 + + + + Maps © Thunderforest, Data © OpenStreetMap contributors + OSM Transportation + + + +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs + 3857 + 3857 + EPSG:3857 + WGS 84 / Pseudo Mercator + merc + WGS84 + false + + + + + + wms + + + + + + + + + + + + + + + 0 + + + + -20037508.34278924390673637 + -20037508.34278925508260727 + 20037508.34278924390673637 + 20037508.34278924390673637 + + OpenStreetMap_de20181215024846026 + crs=EPSG:3857&format&type=xyz&url=http://a.tile.openstreetmap.de/tiles/osmde/%7Bz%7D/%7Bx%7D/%7By%7D.png&zmax=18&zmin=0 + + + + OpenStreetMap contributors, under ODbL + OpenStreetMap.de + + + +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs + 3857 + 3857 + EPSG:3857 + WGS 84 / Pseudo Mercator + merc + WGS84 + false + + + + + + wms + + + + + + + + + + + + + + + 0 + + + + 6.70738836720000009 + 47.05522500000000008 + 6.74579299999999993 + 47.06721104129999844 path20180217155708341 ./path.json @@ -2825,8 +2324,8 @@ def my_form_open(dialog, layer, feature): - - + + @@ -2890,1434 +2389,18 @@ def my_form_open(dialog, layer, feature): - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 - 0 - 30 - - - - - . - - - - - - - - . - - 0 - . - - 0 - generatedlayout - - - - - - - - - - trgraph_trgraph_LineString20180508200527144 - ./trgraph.json|layerid=0|geometrytype=LineString - - - - trgraph trgraph LineString - - - +proj=longlat +datum=WGS84 +no_defs - 3452 - 4326 - EPSG:4326 - WGS 84 - longlat - WGS84 - true - - - ogr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 - id + - - + + @@ -4363,36 +2447,23 @@ def my_form_open(dialog, layer, feature): - + - - - - - - - - - + - + + + - - + . 0 - + . - - - - - - - - - - - - - trgraph_trgraph_Point20180508200527256 - ./trgraph.json|layerid=0|geometrytype=Point - - - - trgraph trgraph Point - - - +proj=longlat +datum=WGS84 +no_defs - 3452 - 4326 - EPSG:4326 - WGS 84 - longlat - WGS84 - true - - - ogr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 - 0 - 0 - id - - - - - - - - - - - - - - - - - - - - - - 0 - - - 0 - generatedlayout - - - - - - - - - - - - - - + + + + + false @@ -4590,17 +2514,19 @@ def my_form_open(dialog, layer, feature): 2 true - D + MU - false + + false - NONE + WGS84 8 + false @@ -4608,70 +2534,29 @@ def my_form_open(dialog, layer, feature): 0 - 240 - 240 + 255 + 255 255 255 255 - 240 + 255 - - 2 - - OGRGeoJSON_LineString20180203134333975 - OGRGeoJSON_LineString20180206114956229 - OGRGeoJSON_Point20180203134333739 - OGRGeoJSON_Point20180206114956218 - path20180217155708341 - - - enabled - enabled - enabled - enabled - enabled - - current_layer - - - 2 - 2 - 2 - 2 - 2 - - - to_vertex - to_vertex - to_vertex - to_vertex - to_vertex - - off - 0 - - 0.000000 - 0.000000 - 0.000000 - 0.000000 - 0.000000 - - - + None false - +proj=longlat +datum=WGS84 +no_defs - EPSG:4326 - 3452 + +proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs + EPSG:3395 + 1353 + 1 @@ -4681,11 +2566,11 @@ def my_form_open(dialog, layer, feature): 255 - + conditions unknown 90 meters - + m2 diff --git a/geo/schweiz_ex.png b/geo/schweiz_ex.png new file mode 100644 index 0000000..9485c2a Binary files /dev/null and b/geo/schweiz_ex.png differ diff --git a/geo/schweiz_ex_res.png b/geo/schweiz_ex_res.png new file mode 100644 index 0000000..2ffa3bd Binary files /dev/null and b/geo/schweiz_ex_res.png differ diff --git a/geo/schweiz_mmatched.png b/geo/schweiz_mmatched.png deleted file mode 100644 index d2c6b17..0000000 Binary files a/geo/schweiz_mmatched.png and /dev/null differ diff --git a/geo/stuttgart_ex.png b/geo/stuttgart_ex.png new file mode 100644 index 0000000..000b33b Binary files /dev/null and b/geo/stuttgart_ex.png differ diff --git a/geo/stuttgart_ex_res.png b/geo/stuttgart_ex_res.png new file mode 100644 index 0000000..a1f0c59 Binary files /dev/null and b/geo/stuttgart_ex_res.png differ diff --git a/pfaedle.cfg b/pfaedle.cfg index 6ad4ae9..07cc68c 100644 --- a/pfaedle.cfg +++ b/pfaedle.cfg @@ -2,793 +2,13 @@ # Chair of Algorithms and Datastructures # Authors: Patrick Brosi -[rail] +[tram, bus, coach, subway, rail, gondola, funicular, ferry] -# OSM entities to keep on different levels, as k=v. Applies -# to nodes, edges and relations. -# Nodes included in kept ways are always kept. -# Ways included in kept relations are always kept. +routing_transition_penalty_fac: 0.0083 +routing_station_move_penalty_fac: 0.0039 -osm_filter_keep: - railway=rail - railway=light_rail - railway=narrow_gauge - route=rail - route=train - public_transport=stop_area|rel_flat +station_similarity_classification_method: jaccard-geodist -osm_filter_lvl1: - usage=branch - -osm_filter_lvl2: - -osm_filter_lvl3: - service=crossover - service=siding - # we cannot completely drop service=yard, because it is often used - # incorrectly for crossovers - service=yard - -osm_filter_lvl4: - -osm_filter_lvl5: - usage=industrial - usage=military - usage=test - service=spur - railway:traffic_mode=freight - -# OSM entities to drop, as k=v. Applies to nodes, edges and -# relations. -# Nodes included in non-dropped ways are kept regardless of -# a matching drop filter. -# Ways included in non-dropped relations are kept regardless of -# a matching drop filter. - -osm_filter_drop: - railway=abandoned - railway=construction - railway=disused - railway=miniature - railway=signal - railway=razed - railway=proposed - metro=yes - area=yes - # access=no - type=multipolygon - railway=platform - public_transport=platform - building=yes - building=train_station - amenity=shelter - amenity=bus_station - building=roof - -# Nodes that should act as "no-hup" nodes. These are nodes -# that are contained in multiple ways, but cannot be used -# to switch from one way to another (for example, a -# track crossing in rail networks) - -osm_filter_nohup: - railway:switch=no - railway=railway_crossing - -# Edges that should act as one-way nodes. - -osm_filter_oneway: - oneway=yes - railway:preferred_direction=forward - -osm_filter_oneway_reverse: - railway:preferred_direction=backward - -# Edges that may explicitely be used in -# both directions. May be used to set exception -# to "osm_filter_oneway" - -osm_filter_undirected: - oneway=false - oneway=no - oneway=-1 - railway:preferred_direction=both - railway:bidirectional=regular - -# Nodes that are stations. -# Only nodes that have been kept during the filtering above will be -# checked. -osm_filter_station: - public_transport=stop_position - railway=stop - railway=halt - railway=station - #railway=tram_stop - railway=subway_stop - tram_stop=* - stop=* - -# Relation fields that should be used for catching the lines that -# occur on an edge. Only relations that have been kept during the -# filtering above will be checked. The 'linename' will be normalized -# according to the rules in line_normalization_chain. -# The 'from_name' and 'to_name' will be normalized according to the -# rules in station_normalization_chain. -# The relations tags are given in the order of their relevance - -# the first normalized tag-value that is not null/empty will be -# taken. -osm_line_relation_tags: - line_name=ref,name # careful, no space after/before comma allowed! - from_name=from - to_name=to - -# attr name together with the -# max distance in meters between any of the groups members and -# a potential new member -# first matching rule will be taken -# only applies to nodes that match osm_filter_station! -osm_station_group_attrs: - uic_ref=500 - wikidata=500 - [public_transport=stop_area]uic_ref=500 - [public_transport=stop_area]wikidata=500 - name=100 - [public_transport=stop_area]name=100 - -# max distance in meters between a snapped station position and the -# original station position -osm_max_snap_distance: 10, 100, 200 - -# max edge level to which station will be snapped -osm_max_snap_level: 2 - -# sorted by priority, first found attr will be taken -osm_station_name_attrs: - name - [public_transport=stop_area]name - uic_name - -# the track number tag in edges, first match is taken -osm_edge_track_number_tags: - railway:track_ref - local_ref - ref - -# the track number tag in stop nodes, first match is taken, -# overwrites osm_edge_track_number_tags -osm_track_number_tags: - local_ref - ref - -routing_lvl0_fac: 1 # default level -routing_lvl1_fac: 1.25 -routing_lvl2_fac: 1.5 -routing_lvl3_fac: 2 -routing_lvl4_fac: 2.5 -routing_lvl5_fac: 3.5 -routing_lvl6_fac: 5 -routing_lvl7_fac: 7 - -# Punishment (in meters) to add to the distance -# function if a vehicle performans a full turn -routing_full_turn_punish: 3000 - -routing_station_distance_punish_fac: 3.14 - -routing_non_osm_station_punish: 100 - -routing_platform_unmatched_punish: 2000 - -# Max angle that should be counted as a full turn -routing_full_turn_angle: 100 - -# Max angle in a route from a station to an already reachable neighbar -routing_snap_full_turn_angle: 100 - -# Punishment (in meters) to add to the distance -# function if a vehicle passes a station node without -# stopping there -routing_pass_thru_station_punish: 100 - -# Punishment factor for every meter a vehicle -# travels through a one-way edge -routing_one_way_meter_punish_fac: 1 - -# Punishment factor for every meter a vehicle -# travels through an edge without any matching line -# information -routing_line_unmatched_punish_fac: 1 - -# special line normalization for trains -line_normalize_chain: - , -> ' '; - - -> ' '; - _ -> ' '; - " -> ''; - ' -> ''; - ` -> ''; - / -> ' '; - < -> ' '; - > -> ' '; - & -> '+'; - ä -> ae; - ö -> oe; - ü -> ue; - ß -> ss; - è -> e; - é -> e; - á -> a; - à -> a; - ó -> o; - ò -> o; - í -> i; - ú -> u; - ù -> u; - ë -> e; - ç -> c; - å -> ae; - â -> a; - ê -> e; - ï -> i; - œ -> oe; - ø -> oe; - ^line -> ''; - ^linie -> ''; - ^metro -> ''; - ^tram -> ''; - ^strassenbahn -> ''; - ^bus -> ''; - - # delete everything in brackets - \(.+\) -> ' '; - \[.+\] -> ' '; - - # whitespace - \s+ -> ' '; - ^\s -> ''; - \s$ -> ''; - - # line/number combs ALWAYS with whitespace (ICE101 -> ICE 101) - ^([a-zA-Z]+)([0-9]+)$ -> \1 \2; - - # if a character line number is present, delete the numeric part - ^([a-zA-Z]+) [0-9]+$ -> \1; - -[bus] - -# OSM entities to keep on different levels, as k=v. Applies -# to nodes, edges and relations. -# Nodes included in kept ways are always kept. -# Ways included in kept relations are always kept. - -osm_filter_keep: - # highways - highway=motorway - highway=trunk - highway=primary - highway=secondary - highway=tertiary - highway=residential - highway=living_street - highway=unclassified - - # highway links - highway=motorway_link - highway=trunk_link - highway=primary_link - highway=secondary_link - highway=tertiary_link - highway=residential_link - - way=primary - way=seconday - way=bus_guideway - highway=bus_guideway - busway=* - psv=yes - psv=designated - - trolley_wire=yes - trolleywire=yes - trolleybus=yes - trolley_bus=yes - - route=bus - route=trolleybus - bus=yes - - public_transport=stop_position - bus_stop=* - stop=* - highway=bus_stop - amenity=bus_station|no_match_ways|no_match_rels - - # relations for the restriction system - type=restriction - type=restriction:bus - type=restriction:motorcar - -osm_filter_lvl1: - highway=secondary - highway=secondary_link - bus=yes - psv=yes - access=psv - access=bus - trolley_wire=yes - trolleywire=yes - trolleybus=yes - trolley_bus=yes - psv=designated - -osm_filter_lvl2: - highway=tertiary - highway=tertiary_link - -osm_filter_lvl3: - highway=unclassified - highway=residential - highway=road - -osm_filter_lvl4: - highway=living_street - highway=pedestrian - highway=service - -osm_filter_lvl5: - service=siding - access=permissive - access=private - access=no - service=parking_aisle - highway=footway - -# OSM entities to drop, as k=v. Applies to nodes, edges and -# relations. -# Nodes included in non-dropped ways are kept regardless of -# a matching drop filter. -# Ways included in non-dropped relations are kept regardless of -# a matching drop filter. - -osm_filter_drop: - area=yes - train=yes|no_match_ways - # access=no - public_transport=stop_area|no_match_nds|no_match_rels - type=multipolygon - railway=platform - railway=station - # service=parking_aisle - highway=proposed - highway=footway - highway=construction - building=yes - building=train_station - leisure=garden - leisure=park - -# Nodes that should act as "no-hup" nodes. These are nodes -# that are contained in multiple ways, but cannot be used -# to switch from one way to another (for example, a -# track crossing in rail networks) - -osm_filter_nohup: - -# Configuration of the OSM road restriction system -# We only support restriction with a single via node -# atm - -osm_node_negative_restriction: - restriction=no_right_turn - restriction=no_left_turn - restriction=no_u_turn - restriction=no_straight_on - restriction:bus=no_right_turn - restriction:bus=no_left_turn - restriction:bus=no_u_turn - restriction:bus=no_straight_on - -osm_node_positive_restriction: - restriction=only_left_turn - restriction=only_straight_on - restriction=only_right_turn - restriction:bus=only_left_turn - restriction:bus=only_straight_on - restriction:bus=only_right_turn - -osm_filter_no_restriction: - except=psv|mult_val_match - except=bus|mult_val_match - -# Edges that should act as one-way nodes. - -osm_filter_oneway: - junction=roundabout # oneway=yes is implied - highway=motorway # oneway=yes is implied - oneway=yes - oneway=1 - oneway=true - oneway:bus=yes - oneway:bus=1 - oneway:bus=true - oneway:psv=yes - oneway:psv=1 - oneway:psv=true - -osm_filter_oneway_reverse: - oneway=-1 - -# Edges that may explicitely be used in -# both directions. May be used to set exception -# to "osm_filter_oneway" - -osm_filter_undirected: - oneway=false - oneway=0 - oneway=alternating - oneway=reversible - oneway=no - oneway:bus=no - oneway:bus=0 - oneway:bus=false - oneway:psv=no - oneway:psv=0 - oneway:psv=false - busway=opposite_lane - busway=opposite - busway:left=opposite_lane - busway:right=opposite_lane - psv=opposite_lane - psv=opposite - -# Nodes that are stations. -# Only nodes that have been kept during the filtering above will be -# checked. -osm_filter_station: - public_transport=stop_position - bus_stop=* - stop=* - highway=bus_stop - amenity=bus_station - -# Relation fields that should be used for catching the lines that -# occur on an edge. Only relations that have been kept during the -# filtering above will be checked. The 'linename' will be normalized -# according to the rules in line_normalization_chain. -# The 'from_name' and 'to_name' will be normalized according to the -# rules in station_normalization_chain. -# The relations tags are given in the order of their relevance - -# the first normalized tag-value that is not null/empty will be -# taken. -osm_line_relation_tags: - line_name=ref,name # careful, no space after/before comma allowed! - from_name=from - to_name=to - - -# attr name together with the -# max distance in meters between any of the groups members and -# a potential new member -# first matching rule will be taken -# only applies to nodes that match osm_filter_station! -osm_station_group_attrs: - uic_ref=500 - wikidata=500 - name=100 - -# max distance in meters between a snapped station position and the -# original station position -osm_max_snap_distance: 10 , 50, 100 - -osm_max_snap_level: 5 - -osm_max_osm_station_distance: 7.5 - -# sorted by priority, first found attr will be taken -osm_station_name_attrs: - name - uic_name - -# the track number tag in stop nodes, first one is taken -osm_track_number_tags: local_ref - -routing_lvl0_fac: 1 # default level -routing_lvl1_fac: 1.25 -routing_lvl2_fac: 1.5 -routing_lvl3_fac: 1.75 -routing_lvl4_fac: 2.25 -routing_lvl5_fac: 3 -routing_lvl6_fac: 4 -routing_lvl7_fac: 5 - -# Punishment (in meters) to add to the distance -# function if a vehicle performans a full turn -routing_full_turn_punish: 500 - -routing_station_distance_punish_fac: 2.5 - -routing_non_osm_station_punish: 500 - -# Max angle that should be counted as a full turn -routing_full_turn_angle: 20 - -# Max angle in a route from a station to an already reachable neighbor -routing_snap_full_turn_angle: 110 - -osm_max_node_block_distance: 10 - -# Punishment (in meters) to add to the distance -# function if a vehicle passes a station node without -# stopping there -routing_pass_thru_station_punish: 0 - -# Punishment factor for every meter a vehicle -# travels through a one-way edge -routing_one_way_meter_punish_fac: 4 - -routing_one_way_edge_punish: 5000 - -# Punishment factor for every meter a vehicle -# travels through an edge without any matching line -# information -# routing_line_unmatched_punish_fac: 1.75 - -[tram, subway, funicular] - -# OSM entities to keep on different levels, as k=v. Applies -# to nodes, edges and relations. -# Nodes included in kept ways are always kept. -# Ways included in kept relations are always kept. - -osm_filter_keep: - route=tram - railway=subway - railway=light_rail - railway=tram - railway=funicular - railway=station - railway=halt - railway=tram_stop - route=subway - route=light_rail - subway=yes - tram=yes - -osm_filter_lv2: - service=siding - -osm_filter_lvl5: - service=crossover - service=yard - -# OSM entities to drop, as k=v. Applies to nodes, edges and -# relations. -# Nodes included in non-dropped ways are kept regardless of -# a matching drop filter. -# Ways included in non-dropped relations are kept regardless of -# a matching drop filter. - -osm_filter_drop: - area=yes - public_transport=stop_area - type=multipolygon - railway=platform - public_transport=platform - service=alley - -# Nodes that should act as "no-hup" nodes. These are nodes -# that are contained in multiple ways, but cannot be used -# to switch from one way to another (for example, a -# track crossing in rail networks) - -osm_filter_nohup: - railway:switch=no - railway=railway_crossing - -# Edges that should act as one-way nodes. - -osm_filter_oneway: - oneway=yes - -# Edges that may explicitely be used in -# both directions. May be used to set exception -# to "osm_filter_oneway" - -osm_filter_undirected: - -# Nodes that are stations. -# Only nodes that have been kept during the filtering above will be -# checked. -osm_filter_station: - public_transport=stop_position - station=subway - station=tram - railway=stop - railway=halt - railway=station - railway=tram_stop - railway=subway_stop - tram_stop=* - stop=* - -# Relation fields that should be used for catching the lines that -# occur on an edge. Only relations that have been kept during the -# filtering above will be checked. The 'linename' will be normalized -# according to the rules in line_normalization_chain. -# The 'from_name' and 'to_name' will be normalized according to the -# rules in station_normalization_chain. -# The relations tags are given in the order of their relevance - -# the first normalized tag-value that is not null/empty will be -# taken. -osm_line_relation_tags: - line_name=ref,name # careful, no space after/before comma allowed! - from_name=from - to_name=to - - -# attr name together with the -# max distance in meters between any of the groups members and -# a potential new member -# first matching rule will be taken -# only applies to nodes that match osm_filter_station! -osm_station_group_attrs: - uic_ref=500 - wikidata=500 - name=100 - -# max distance in meters between a snapped station position and the -# original station position -osm_max_snap_distance: 10, 50, 100 - -osm_max_snap_level: 4 - - -# sorted by priority, first found attr will be taken -osm_station_name_attrs: - name - uic_name - -# the track number tag in stop nodes, first one is taken -osm_track_number_tags: local_ref - -routing_lvl0_fac: 1 # default level -routing_lvl1_fac: 1.5 -routing_lvl2_fac: 2 -routing_lvl3_fac: 2.5 -routing_lvl4_fac: 3.5 -routing_lvl5_fac: 5 -routing_lvl6_fac: 5 -routing_lvl7_fac: 5 - -# Punishment (in meters) to add to the distance -# function if a vehicle performans a full turn -routing_full_turn_punish: 2000 - -routing_station_distance_punish_fac: 3.14 - -routing_non_osm_station_punish: 235 - -# Max angle that should be counted as a full turn -routing_full_turn_angle: 80 - -# Max angle in a route from a station to an already reachable neighbar -routing_snap_full_turn_angle: 80 - -# Punishment (in meters) to add to the distance -# function if a vehicle passes a station node without -# stopping there -routing_pass_thru_station_punish: 100 - -# Punishment factor for every meter a vehicle -# travels through a one-way edge -routing_one_way_meter_punish_fac: 1 - -# Punishment factor for every meter a vehicle -# travels through an edge without any matching line -# information -routing_line_unmatched_punish_fac: 0.5 - -[ferry] - -# OSM entities to keep on different levels, as k=v. Applies -# to nodes, edges and relations. -# Nodes included in kept ways are always kept. -# Ways included in kept relations are always kept. - -osm_filter_keep: - route=ferry - waterway=river - motorboat=yes - ferry=yes - -# Nodes that are stations. -# Only nodes that have been kept during the filtering above will be -# checked. -osm_filter_station: - public_transport=stop_position - station=ferry - railway=stop - railway=halt - railway=station - stop=* - -# Relation fields that should be used for catching the lines that -# occur on an edge. Only relations that have been kept during the -# filtering above will be checked. The 'linename' will be normalized -# according to the rules in line_normalization_chain. -# The 'from_name' and 'to_name' will be normalized according to the -# rules in station_normalization_chain. -# The relations tags are given in the order of their relevance - -# the first normalized tag-value that is not null/empty will be -# taken. -osm_line_relation_tags: - line_name=ref,name # careful, no space after/before comma allowed! - from_name=from - to_name=to - - -# attr name together with the -# max distance in meters between any of the groups members and -# a potential new member -# first matching rule will be taken -# only applies to nodes that match osm_filter_station! -osm_station_group_attrs: - uic_ref=500 - wikidata=500 - name=100 - -# max distance in meters between a snapped station position and the -# original station position -osm_max_snap_distance: 10, 100, 200 - -osm_max_snap_level: 4 - - -# sorted by priority, first found attr will be taken -osm_station_name_attrs: - name - uic_name - -# the track number tag in stop nodes, first one is taken -osm_track_number_tags: local_ref - -routing_lvl0_fac: 1 # default level -routing_lvl1_fac: 1.5 -routing_lvl2_fac: 2 -routing_lvl3_fac: 2.5 -routing_lvl4_fac: 3.5 -routing_lvl5_fac: 5 -routing_lvl6_fac: 5 -routing_lvl7_fac: 5 - -# Punishment (in meters) to add to the distance -# function if a vehicle performans a full turn -routing_full_turn_punish: 100 - -routing_station_distance_punish_fac: 3.14 - -routing_non_osm_station_punish: 50 - -# Max angle that should be counted as a full turn -routing_full_turn_angle: 45 - -# Max angle in a route from a station to an already reachable neighbar -routing_snap_full_turn_angle: 0 - -# Punishment (in meters) to add to the distance -# function if a vehicle passes a station node without -# stopping there -routing_pass_thru_station_punish: 0 - -# Punishment factor for every meter a vehicle -# travels through a one-way edge -routing_one_way_meter_punish_fac: 1 - -# Punishment factor for every meter a vehicle -# travels through an edge without any matching line -# information -routing_line_unmatched_punish_fac: 0.5 - -[tram, bus, subway, rail, gondola, funicular, ferry] # Regular expressions and station comparision is # always case insensitive! station_normalize_chain: @@ -808,6 +28,15 @@ station_normalize_chain: < -> ' '; > -> ' '; & -> '+'; + Ä -> Ae; + Ö -> Oe; + Ü -> Ue; + À -> A; + Ò -> O; + Ù -> U; + Á -> A; + Ó -> O; + Ú -> U; ä -> ae; ö -> oe; ü -> ue; @@ -942,6 +171,9 @@ line_normalize_chain: ä -> ae; ö -> oe; ü -> ue; + Ä -> Ae; + Ö -> Oe; + Ü -> Ue; ß -> ss; è -> e; é -> e; @@ -979,6 +211,9 @@ line_normalize_chain: # line/number combs ALWAYS without whitespace (T 2 -> T2) ^([a-zA-Z]+) ([0-9]+)$ -> \1\2; + # delete extra line specifier in data for Vitoria-Gasteiz (L2 -> 2) + ^l([0-9a-zA-Z]+)$ -> \1; + track_normalize_chain: '(^| )gleis($| )' -> ''; '(^| )gl\.($| )' -> ''; @@ -991,3 +226,1206 @@ track_normalize_chain: # delete track numbers greater than 999 ^[0-9]{4,}$ -> ''; + +[rail] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + railway=rail + railway=light_rail + railway=tram + railway=narrow_gauge + route=rail + route=light_rail + route=train + public_transport=stop_area|rel_flat + +osm_filter_lvl1: + usage=branch + +osm_filter_lvl2: + railway=tram + service=siding + +osm_filter_lvl3: + service=crossover + # we cannot completely drop service=yard, because it is often used + # incorrectly for crossovers + service=yard + +osm_filter_lvl4: + +osm_filter_lvl5: + usage=industrial + usage=military + usage=test + service=spur + railway:traffic_mode=freight + +# OSM entities to drop, as k=v. Applies to nodes, edges and +# relations. +# Nodes included in non-dropped ways are kept regardless of +# a matching drop filter. +# Ways included in non-dropped relations are kept regardless of +# a matching drop filter. + +osm_filter_drop: + railway=abandoned + railway=construction + railway=disused + railway=miniature + railway=signal + railway=razed + railway=proposed + metro=yes + area=yes + # access=no + type=multipolygon + railway=platform + public_transport=platform + building=yes + building=train_station + amenity=shelter + amenity=bus_station + building=roof + +# Nodes that should act as "no-hup" nodes. These are nodes +# that are contained in multiple ways, but cannot be used +# to switch from one way to another (for example, a +# track crossing in rail networks) + +osm_filter_nohup: + railway:switch=no + railway=railway_crossing + +# Edges that should act as one-way nodes. + +osm_filter_oneway: + oneway=yes + railway:preferred_direction=forward + +osm_filter_oneway_reverse: + railway:preferred_direction=backward + +# Edges that may explicitely be used in +# both directions. May be used to set exception +# to "osm_filter_oneway" + +osm_filter_undirected: + oneway=false + oneway=no + oneway=-1 + railway:preferred_direction=both + railway:bidirectional=regular + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + public_transport=stop_position + railway=stop + railway=halt + railway=station + #railway=tram_stop + railway=subway_stop + tram_stop=* + stop=* + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + line_name=ref,name # careful, no space after/before comma allowed! + from_name=from + to_name=to + +# max distance in meters between a snapped position on an +# edge and the input GTFS/OSM station +osm_max_snap_distance: 200 + +# max edge level to which station will be snapped +osm_max_snap_level: 2 + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + [public_transport=stop_area]name + uic_name + +# the track number tag in edges, first match is taken +osm_edge_track_number_tags: + railway:track_ref + local_ref + ref + +# the track number tag in stop nodes, first match is taken, +# overwrites osm_edge_track_number_tags +osm_track_number_tags: + local_ref + ref + +# avg speed on segment levels, in km/h +osm_lvl0_avg_speed: 120 # default level +osm_lvl1_avg_speed: 90 +osm_lvl2_avg_speed: 65 +osm_lvl3_avg_speed: 50 +osm_lvl4_avg_speed: 30 +osm_lvl5_avg_speed: 20 +osm_lvl6_avg_speed: 10 +osm_lvl7_avg_speed: 5 + +# Punishment (in seconds) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_penalty: 180 # 3 minutes + +# Penalty added to non-station placements +routing_non_station_penalty: 0.4 + +# If the station name does not match, add this penalty +routing_station_unmatched_penalty: 0.4 + +# If the platform does not match, add this penalty +routing_platform_unmatched_penalty: 0.1 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 100 + +# Max angle in a route from a station to an already reachable neighbar +routing_snap_full_turn_angle: 100 + +# Factor by which the vehicle slows down in a one way street (factor 5 +# means it will take 5 times longer) +osm_one_way_speed_penalty_fac: 5 + +# Additional one-time time penalty for entering a one-way segment +# in seconds +osm_one_way_entry_cost: 300 + +# If a segment has no matching line attributes, multiply the +# time needed to traverse it with the given factor (should +# be > 1 for a punishment, values < 1 will prefer unmatching segments) +routing_line_unmatched_time_penalty_fac: 1.2 +routing_line_station_to_unmatched_time_penalty: 1.1 +routing_line_station_from_unmatched_time_penalty: 1.05 + +# If a segment has no line attributes at all, multiply the +# time needed to traverse it with the given factor (should +# be > 1 for a punishment, values < 1 will prefer unmatching segments) +# routing_no_lines_penalty_fac: 1 + +# special line normalization for trains +line_normalize_chain: + , -> ' '; + - -> ' '; + _ -> ' '; + " -> ''; + ' -> ''; + ` -> ''; + / -> ' '; + < -> ' '; + > -> ' '; + & -> '+'; + ä -> ae; + ö -> oe; + ü -> ue; + Ä -> Ae; + Ö -> Oe; + Ü -> Ue; + ß -> ss; + è -> e; + é -> e; + á -> a; + à -> a; + ó -> o; + ò -> o; + í -> i; + ú -> u; + ù -> u; + ë -> e; + ç -> c; + å -> ae; + â -> a; + ê -> e; + ï -> i; + œ -> oe; + ø -> oe; + ^line -> ''; + ^linie -> ''; + ^metro -> ''; + ^tram -> ''; + ^strassenbahn -> ''; + ^bus -> ''; + + # delete everything in brackets + \(.+\) -> ' '; + \[.+\] -> ' '; + + # whitespace + \s+ -> ' '; + ^\s -> ''; + \s$ -> ''; + + # line/number combs ALWAYS with whitespace (ICE101 -> ICE 101) + ^([a-zA-Z]+)([0-9]+)$ -> \1 \2; + + # if a character line number is present, delete the numeric part + ^([a-zA-Z]+) [0-9]+$ -> \1; + +track_normalize_chain: + '(^| )gleis($| )' -> ''; + '(^| )gl\.($| )' -> ''; + '(^| )platform($| )' -> ''; + '(^| )track($| )' -> ''; + '(^| )rail($| )' -> ''; + ^([a-zA-Z]+) ([0-9]+)$ -> \1\2; + # number/char combs ALWAYS without char + ^([0-9]+) ([a-zA-Z]+)$ -> \1; + ^([0-9]+)([a-zA-Z]+)$ -> \1; + + # delete track numbers greater than 999 + ^[0-9]{4,}$ -> ''; + +[bus, coach] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + # highways + highway=motorway + highway=trunk + highway=primary + highway=secondary + highway=tertiary + highway=residential + highway=living_street + highway=unclassified + + # highway links + highway=motorway_link + highway=trunk_link + highway=primary_link + highway=secondary_link + highway=tertiary_link + highway=residential_link + + way=primary + way=seconday + way=bus_guideway + highway=bus_guideway + busway=* + psv=yes + psv=designated + + bus:lanes=yes + bus:lanes=designated + bus:lanes=1 + + lanes:bus=1 + lanes:bus=2 + lanes:bus=3 + + lanes:psv=1 + lanes:psv=2 + lanes:psv=3 + + trolley_wire=yes + trolleywire=yes + trolleybus=yes + trolley_bus=yes + + route=bus + route=trolleybus + bus=yes + bus=designated + minibus=designated + minibus=yes + + public_transport=stop_position + bus_stop=* + stop=* + highway=bus_stop + amenity=bus_station|no_match_ways|no_match_rels + + # relations for the restriction system + type=restriction + type=restriction:bus + type=restriction:motorcar + +osm_filter_lvl1: + highway=trunk + highway=trunk_link + highway=primary + highway=primary_link + +osm_filter_lvl2: + highway=secondary + highway=secondary_link + bus=yes + bus=designated + minibus=yes + minibus=designated + psv=designated + psv=yes + access=psv + access=bus + trolley_wire=yes + trolleywire=yes + trolleybus=yes + trolley_bus=yes + psv=designated + +osm_filter_lvl3: + highway=tertiary + highway=tertiary_link + +osm_filter_lvl4: + highway=unclassified + highway=residential + highway=road + +osm_filter_lvl5: + highway=living_street + highway=pedestrian + highway=service + psv=no + +osm_filter_lvl6: + bus=no + service=siding + access=permissive + access=private + access=no + service=parking_aisle + highway=footway + highway=track + +# OSM entities to drop, as k=v. Applies to nodes, edges and +# relations. +# Nodes included in non-dropped ways are kept regardless of +# a matching drop filter. +# Ways included in non-dropped relations are kept regardless of +# a matching drop filter. + +osm_filter_drop: + area=yes + train=yes|no_match_ways + # access=no + public_transport=stop_area|no_match_nds|no_match_rels + type=multipolygon + railway=platform + railway=station + # service=parking_aisle + highway=proposed + highway=footway + highway=construction + building=yes + building=train_station + leisure=garden + leisure=park + +# Nodes that should act as "no-hup" nodes. These are nodes +# that are contained in multiple ways, but cannot be used +# to switch from one way to another (for example, a +# track crossing in rail networks) + +osm_filter_nohup: + +# Configuration of the OSM road restriction system +# We only support restriction with a single via node +# atm + +osm_node_negative_restriction: + restriction=no_right_turn + restriction=no_left_turn + restriction=no_u_turn + restriction=no_straight_on + restriction:bus=no_right_turn + restriction:bus=no_left_turn + restriction:bus=no_u_turn + restriction:bus=no_straight_on + +osm_node_positive_restriction: + restriction=only_left_turn + restriction=only_straight_on + restriction=only_right_turn + restriction:bus=only_left_turn + restriction:bus=only_straight_on + restriction:bus=only_right_turn + +osm_filter_no_restriction: + except=psv|mult_val_match + except=bus|mult_val_match + +# Edges that should act as one-way nodes. + +osm_filter_oneway: + junction=roundabout # oneway=yes is implied + highway=motorway # oneway=yes is implied + oneway=yes + oneway=1 + oneway=true + oneway:bus=yes + oneway:bus=1 + oneway:bus=true + oneway:psv=yes + oneway:psv=1 + oneway:psv=true + +osm_filter_oneway_reverse: + oneway=-1 + +# Edges that may explicitely be used in +# both directions. May be used to set exception +# to "osm_filter_oneway" + +osm_filter_undirected: + oneway=false + oneway=0 + oneway=alternating + oneway=reversible + oneway=no + oneway:bus=no + oneway:bus=0 + oneway:bus=false + oneway:psv=no + oneway:psv=0 + oneway:psv=false + busway=opposite_lane + busway=opposite + busway:left=opposite_lane + busway:right=opposite_lane + psv=opposite_lane + psv=opposite + lanes:psv:backward=1 + lanes:psv:backward=2 + lanes:bus:backward=1 + lanes:bus:backward=2 + bus:lanes:backward=yes + bus:lanes:backward=designated + bus:lanes:backward=1 + + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + public_transport=stop_position + bus_stop=* + stop=* + highway=bus_stop + amenity=bus_station + +osm_filter_turning_circle: + highway=turning_circle + highway=turning_loop + junction=roundabout + highway=mini_roundabout + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + line_name=ref,name # careful, no space after/before comma allowed! + from_name=from + to_name=to + line_color=colour,color + +# max distance in meters between a OSM station candidate +# and the input GTFS station +osm_max_station_cand_distance: 200 + +# max distance in meters between a snapped position on an +# edge and the input GTFS/OSM station +osm_max_snap_distance: 100 + +osm_max_snap_level: 5 + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + uic_name + +# the track number tag in stop nodes, first one is taken +osm_track_number_tags: local_ref + +# avg speed on segment levels, in km/h +osm_lvl0_avg_speed: 85 # default level +osm_lvl1_avg_speed: 70 +osm_lvl2_avg_speed: 55 +osm_lvl3_avg_speed: 40 +osm_lvl4_avg_speed: 30 +osm_lvl5_avg_speed: 20 +osm_lvl6_avg_speed: 10 +osm_lvl7_avg_speed: 5 + +# Factor by which the vehicle slows down in a one way street (factor 5 +# means it will take 5 times longer) +osm_one_way_speed_penalty_fac: 5 + +# Additional one-time time penalty for entering a one-way segment +# in seconds +osm_one_way_entry_cost: 300 + +# If a segment has no matching line attributes, multiply the +# time needed to traverse it with the given factor (should +# be > 1 for a punishment, values < 1 will prefer unmatching segments) +routing_line_unmatched_time_penalty_fac: 1.2 +routing_line_station_to_unmatched_time_penalty: 1.1 +routing_line_station_from_unmatched_time_penalty: 1.05 + +# If a segment has no line attributes at all, multiply the +# time needed to traverse it with the given factor (should +# be > 1 for a punishment, values < 1 will prefer unmatching segments) +# routing_no_lines_penalty_fac: 1 + +# If the station name does not match, add this penalty +routing_station_unmatched_penalty: 0.4 + +# Punishment (in seconds) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_penalty: 120 # 2 minutes + +# Penalty added to non-station placements +routing_non_station_penalty: 0.4 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 20 + +# Max angle in a route from a station to an already reachable neighbor +routing_snap_full_turn_angle: 110 + +osm_max_node_block_distance: 10 + + +[coach] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_lvl0: + highway=motorway + highway=motorway_link + +osm_filter_lvl1: + highway=trunk + highway=trunk_link + +osm_filter_lvl2: + highway=primary + highway=primary_link + +osm_filter_lvl3: + highway=secondary + highway=secondary_link + +osm_filter_lvl4: + highway=tertiary + highway=tertiary_link + +osm_filter_lvl5: + highway=unclassified + highway=residential + highway=road + highway=service + +osm_filter_lvl6: + highway=living_street + highway=pedestrian + psv=no + +osm_filter_lvl7: + bus=no + service=siding + access=permissive + access=private + access=no + service=parking_aisle + highway=footway + +osm_lvl0_avg_speed: 120 # default level +osm_lvl1_avg_speed: 90 +osm_lvl2_avg_speed: 65 +osm_lvl3_avg_speed: 50 +osm_lvl4_avg_speed: 30 +osm_lvl5_avg_speed: 20 +osm_lvl6_avg_speed: 10 +osm_lvl7_avg_speed: 5 + +osm_max_snap_level: 5 + +[tram, subway] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + route=tram + route=funicular + railway=subway + railway=light_rail + railway=tram + railway=funicular + railway=station + railway=halt + railway=tram_stop + route=subway + route=light_rail + subway=yes + tram=yes + +osm_filter_lvl2: + service=siding + +osm_filter_lvl3: + railway=funicular + route=funicular + +osm_filter_lvl5: + service=crossover + service=yard + +# OSM entities to drop, as k=v. Applies to nodes, edges and +# relations. +# Nodes included in non-dropped ways are kept regardless of +# a matching drop filter. +# Ways included in non-dropped relations are kept regardless of +# a matching drop filter. + +osm_filter_drop: + area=yes + public_transport=stop_area + type=multipolygon + railway=platform + public_transport=platform + service=alley + +# Nodes that should act as "no-hup" nodes. These are nodes +# that are contained in multiple ways, but cannot be used +# to switch from one way to another (for example, a +# track crossing in rail networks) + +osm_filter_nohup: + railway:switch=no + railway=railway_crossing + +# Edges that should act as one-way nodes. + +osm_filter_oneway: + oneway=yes + +# Edges that may explicitely be used in +# both directions. May be used to set exception +# to "osm_filter_oneway" + +osm_filter_undirected: + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + public_transport=stop_position + station=subway + station=tram + railway=stop + railway=halt + railway=station + railway=tram_stop + railway=subway_stop + tram_stop=* + stop=* + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + line_name=ref,name # careful, no space after/before comma allowed! + from_name=from + to_name=to + +# max distance in meters between a snapped position on an +# edge and the input GTFS/OSM station +osm_max_snap_distance: 100 + +osm_max_snap_level: 4 + + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + uic_name + +# the track number tag in stop nodes, first one is taken +osm_track_number_tags: local_ref + +# avg speed on segment levels, in km/h +osm_lvl0_avg_speed: 85 # default level +osm_lvl1_avg_speed: 70 +osm_lvl2_avg_speed: 55 +osm_lvl3_avg_speed: 40 +osm_lvl4_avg_speed: 30 +osm_lvl5_avg_speed: 20 +osm_lvl6_avg_speed: 10 +osm_lvl7_avg_speed: 5 + +# Punishment (in seconds) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_penalty: 180 # 3 minutes + +# Penalty added to non-station placements +routing_non_station_penalty: 0.4 + +# If the station name does not match, add this penalty +routing_station_unmatched_penalty: 0.4 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 80 + +# Max angle in a route from a station to an already reachable neighbar +routing_snap_full_turn_angle: 80 + +# Factor by which the vehicle slows down in a one way street (factor 5 +# means it will take 5 times longer) +osm_one_way_speed_penalty_fac: 2 + +# If a segment has no matching line attributes, multiply the +# time needed to traverse it with the given factor (should +# be > 1 for a punishment, values < 1 will prefer unmatching segments) +routing_line_unmatched_time_penalty_fac: 1.2 +routing_line_station_to_unmatched_time_penalty: 1.1 +routing_line_station_from_unmatched_time_penalty: 1.05 + +# If a segment has no line attributes at all, multiply the +# time needed to traverse it with the given factor (should +# be > 1 for a punishment, values < 1 will prefer unmatching segments) +# routing_no_lines_penalty_fac: 1 + +[gondola] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + aerialway=gondola + aerialway=cable_car + aerialway=chair_lift + aerialway=mixed_lift + + +# OSM entities to drop, as k=v. Applies to nodes, edges and +# relations. +# Nodes included in non-dropped ways are kept regardless of +# a matching drop filter. +# Ways included in non-dropped relations are kept regardless of +# a matching drop filter. + +osm_filter_drop: + area=yes + public_transport=stop_area + type=multipolygon + railway=platform + public_transport=platform + service=alley + +# Nodes that should act as "no-hup" nodes. These are nodes +# that are contained in multiple ways, but cannot be used +# to switch from one way to another (for example, a +# track crossing in rail networks) + +osm_filter_nohup: + +# Edges that should act as one-way nodes. + +osm_filter_oneway: + oneway=yes + +# Edges that may explicitely be used in +# both directions. May be used to set exception +# to "osm_filter_oneway" + +osm_filter_undirected: + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + aerialway=station + aerialway=stop + public_transport=stop_position + station=subway + station=tram + railway=stop + railway=halt + railway=station + railway=tram_stop + railway=subway_stop + tram_stop=* + stop=* + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + line_name=ref,name # careful, no space after/before comma allowed! + from_name=from + to_name=to + +# max distance in meters between a snapped position on an +# edge and the input GTFS/OSM station +osm_max_snap_distance: 100 + +osm_max_snap_level: 4 + + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + uic_name + +# the track number tag in stop nodes, first one is taken +osm_track_number_tags: local_ref + +# avg speed on segment levels, in km/h +osm_lvl0_avg_speed: 85 # default level +osm_lvl1_avg_speed: 70 +osm_lvl2_avg_speed: 55 +osm_lvl3_avg_speed: 40 +osm_lvl4_avg_speed: 30 +osm_lvl5_avg_speed: 20 +osm_lvl6_avg_speed: 10 +osm_lvl7_avg_speed: 5 + +# Punishment (in seconds) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_penalty: 120 # 2 minutes + +# Penalty added to non-station placements +routing_non_station_penalty: 0.4 + +# If the station name does not match, add this penalty +routing_station_unmatched_penalty: 0.4 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 80 + +# Max angle in a route from a station to an already reachable neighbar +routing_snap_full_turn_angle: 80 + +# Factor by which the vehicle slows down in a one way street (factor 5 +# means it will take 5 times longer) +osm_one_way_speed_penalty_fac: 2 + +# If a segment has no matching line attributes, multiply the +# time needed to traverse it with the given factor (should +# be > 1 for a punishment, values < 1 will prefer unmatching segments) +routing_line_unmatched_time_penalty_fac: 1.2 +routing_line_station_to_unmatched_time_penalty: 1.1 +routing_line_station_from_unmatched_time_penalty: 1.05 + +# If a segment has no line attributes at all, multiply the +# time needed to traverse it with the given factor (should +# be > 1 for a punishment, values < 1 will prefer unmatching segments) +# routing_no_lines_penalty_fac: 1 + +[funicular] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + route=funicular + railway=funicular + railway=narrow_gauge + route=tram + railway=subway + railway=light_rail + railway=tram + railway=station + railway=halt + railway=tram_stop + route=subway + route=light_rail + subway=yes + tram=yes + +osm_filter_lvl2: + service=siding + +osm_filter_lvl3: + route=tram + route=narrow_gauge + railway=subway + railway=narrow_gauge + railway=light_rail + railway=tram + railway=station + railway=halt + railway=tram_stop + route=subway + route=light_rail + subway=yes + tram=yes + +osm_filter_lvl5: + service=crossover + service=yard + +# OSM entities to drop, as k=v. Applies to nodes, edges and +# relations. +# Nodes included in non-dropped ways are kept regardless of +# a matching drop filter. +# Ways included in non-dropped relations are kept regardless of +# a matching drop filter. + +osm_filter_drop: + area=yes + public_transport=stop_area + type=multipolygon + railway=platform + public_transport=platform + service=alley + +# Nodes that should act as "no-hup" nodes. These are nodes +# that are contained in multiple ways, but cannot be used +# to switch from one way to another (for example, a +# track crossing in rail networks) + +osm_filter_nohup: + railway:switch=no + railway=railway_crossing + +# Edges that should act as one-way nodes. + +osm_filter_oneway: + oneway=yes + +# Edges that may explicitely be used in +# both directions. May be used to set exception +# to "osm_filter_oneway" + +osm_filter_undirected: + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + public_transport=stop_position + station=subway + station=tram + railway=stop + railway=halt + railway=station + railway=tram_stop + railway=subway_stop + tram_stop=* + stop=* + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + line_name=ref,name # careful, no space after/before comma allowed! + from_name=from + to_name=to + +# max distance in meters between a snapped position on an +# edge and the input GTFS/OSM station +osm_max_snap_distance: 100 + +osm_max_snap_level: 4 + + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + uic_name + +# the track number tag in stop nodes, first one is taken +osm_track_number_tags: local_ref + +# avg speed on segment levels, in km/h +osm_lvl0_avg_speed: 85 # default level +osm_lvl1_avg_speed: 70 +osm_lvl2_avg_speed: 55 +osm_lvl3_avg_speed: 40 +osm_lvl4_avg_speed: 30 +osm_lvl5_avg_speed: 20 +osm_lvl6_avg_speed: 10 +osm_lvl7_avg_speed: 5 + +# Punishment (in seconds) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_penalty: 120 # 2 minutes + +# Penalty added to non-station placements +routing_non_station_penalty: 0.4 + +# If the station name does not match, add this penalty +routing_station_unmatched_penalty: 0.4 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 80 + +# Max angle in a route from a station to an already reachable neighbar +routing_snap_full_turn_angle: 80 + +# Factor by which the vehicle slows down in a one way street (factor 5 +# means it will take 5 times longer) +osm_one_way_speed_penalty_fac: 2 + +# If a segment has no matching line attributes, multiply the +# time needed to traverse it with the given factor (should +# be > 1 for a punishment, values < 1 will prefer unmatching segments) +routing_line_unmatched_time_penalty_fac: 1.2 +routing_line_station_to_unmatched_time_penalty: 1.1 +routing_line_station_from_unmatched_time_penalty: 1.05 + +# If a segment has no line attributes at all, multiply the +# time needed to traverse it with the given factor (should +# be > 1 for a punishment, values < 1 will prefer unmatching segments) +# routing_no_lines_penalty_fac: 1 + +[ferry] + +# OSM entities to keep on different levels, as k=v. Applies +# to nodes, edges and relations. +# Nodes included in kept ways are always kept. +# Ways included in kept relations are always kept. + +osm_filter_keep: + route=ferry + waterway=river + motorboat=yes + ferry=yes + amenity=ferry_terminal + mooring=ferry + station=ferry + +# Nodes that are stations. +# Only nodes that have been kept during the filtering above will be +# checked. +osm_filter_station: + ferry=yes + public_transport=stop_position + amenity=ferry_terminal + mooring=ferry + station=ferry + railway=stop + railway=halt + railway=station + stop=* + +# Relation fields that should be used for catching the lines that +# occur on an edge. Only relations that have been kept during the +# filtering above will be checked. The 'linename' will be normalized +# according to the rules in line_normalization_chain. +# The 'from_name' and 'to_name' will be normalized according to the +# rules in station_normalization_chain. +# The relations tags are given in the order of their relevance - +# the first normalized tag-value that is not null/empty will be +# taken. +osm_line_relation_tags: + line_name=ref,name # careful, no space after/before comma allowed! + from_name=from + to_name=to + +# max distance in meters between a snapped position on an +# edge and the input GTFS/OSM station +osm_max_snap_distance: 500 + +osm_max_snap_level: 4 + + +# sorted by priority, first found attr will be taken +osm_station_name_attrs: + name + uic_name + +# the track number tag in stop nodes, first one is taken +osm_track_number_tags: local_ref + +# avg speed on segment levels, in km/h +osm_lvl0_avg_speed: 70 # default level +osm_lvl1_avg_speed: 60 +osm_lvl2_avg_speed: 50 +osm_lvl3_avg_speed: 35 +osm_lvl4_avg_speed: 30 +osm_lvl5_avg_speed: 25 +osm_lvl6_avg_speed: 10 +osm_lvl7_avg_speed: 5 + +# Punishment (in seconds) to add to the distance +# function if a vehicle performans a full turn +routing_full_turn_penalty: 120 # 2 minutes + +# Penalty added to non-station placements +routing_non_station_penalty: 0.4 + +# If the station name does not match, add this penalty +routing_station_unmatched_penalty: 0.4 + +# Max angle that should be counted as a full turn +routing_full_turn_angle: 45 + +# Max angle in a route from a station to an already reachable neighbar +routing_snap_full_turn_angle: 0 + +# Factor by which the vehicle slows down in a one way street (factor 5 +# means it will take 5 times longer) +osm_one_way_speed_penalty_fac: 2 + +# If a segment has no matching line attributes, multiply the +# time needed to traverse it with the given factor (should +# be > 1 for a punishment, values < 1 will prefer unmatching segments) +routing_line_unmatched_time_penalty_fac: 1.2 +routing_line_station_to_unmatched_time_penalty: 1.15 +routing_line_station_from_unmatched_time_penalty: 1.1 + +# If a segment has no line attributes at all, multiply the +# time needed to traverse it with the given factor (should +# be > 1 for a punishment, values < 1 will prefer unmatching segments) +# routing_no_lines_penalty_fac: 1 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 14edcfc..d60a569 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,13 +1,25 @@ set(PFAEDLE_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR} ${CMAKE_CURRENT_BINARY_DIR}) -cpplint_add_subdirectory(pfaedle) - include_directories( ${PFAEDLE_INCLUDE_DIR} ) +if (ZLIB_FOUND) + include_directories( ${ZLIB_INCLUDE_DIRS} ) +endif( ZLIB_FOUND ) + +if (LIBZIP_FOUND) + include_directories( SYSTEM ${LIBZIP_INCLUDE_DIR} ) + include_directories( SYSTEM ${LIBZIP_CONF_INCLUDE_DIR} ) +endif( LIBZIP_FOUND ) + +if (BZIP2_FOUND) + include_directories( SYSTEM ${BZIP2_INCLUDE_DIR} ) +endif( BZIP2_FOUND ) + add_subdirectory(util) add_subdirectory(pfaedle) add_subdirectory(cppgtfs) -add_subdirectory(xml) add_subdirectory(configparser) +add_subdirectory(shapevl) +add_subdirectory(xml) diff --git a/src/configparser b/src/configparser index 5a1f788..ca166b3 160000 --- a/src/configparser +++ b/src/configparser @@ -1 +1 @@ -Subproject commit 5a1f788dde8f334d40505268f71bcc473d1968d8 +Subproject commit ca166b3446d5bb8b5fb8c6f637ca3f9cb0a8ff3b diff --git a/src/cppgtfs b/src/cppgtfs index 3a462c3..d26d579 160000 --- a/src/cppgtfs +++ b/src/cppgtfs @@ -1 +1 @@ -Subproject commit 3a462c37358da19f10e89f77fb7a277d69c6c4dc +Subproject commit d26d5794d396141905d71ecb8cd4f45e0120cba7 diff --git a/src/pfaedle/CMakeLists.txt b/src/pfaedle/CMakeLists.txt index 5963842..7214555 100644 --- a/src/pfaedle/CMakeLists.txt +++ b/src/pfaedle/CMakeLists.txt @@ -16,5 +16,16 @@ configure_file ( add_executable(pfaedle ${pfaedle_main}) add_library(pfaedle_dep ${pfaedle_SRC}) -include_directories(pfaedle_dep PUBLIC ${PROJECT_SOURCE_DIR}/src/cppgtfs/src) -target_link_libraries(pfaedle pfaedle_dep util xml configparser ad_cppgtfs ${Boost_LIBRARIES} -lpthread) +include_directories(pfaedle_dep PUBLIC ${PROJECT_SOURCE_DIR}/src/xml/include/ ${PROJECT_SOURCE_DIR}/src/cppgtfs/src) + +target_link_libraries(pfaedle pfaedle_dep util configparser ad_cppgtfs -lpthread) + +if (LIBZIP_FOUND) + target_link_libraries(pfaedle ${LIBZIP_LIBRARY}) +endif( LIBZIP_FOUND ) + +if (BZIP2_FOUND) + target_link_libraries(pfaedle ${BZIP2_LIBRARY}) +endif( BZIP2_FOUND ) + +add_subdirectory(tests) diff --git a/src/pfaedle/Def.h b/src/pfaedle/Def.h new file mode 100644 index 0000000..397dc9b --- /dev/null +++ b/src/pfaedle/Def.h @@ -0,0 +1,34 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_DEF_H_ +#define PFAEDLE_DEF_H_ + +#include +#include +#include "util/log/Log.h" +#include "util/Misc.h" +#include "util/geo/Geo.h" +#include "util/geo/PolyLine.h" + +#define __str_a(s) __str_b(s) +#define __str_b(s) #s +#define __str_c(s) s ## 1 +#define __str_d(s) __str_c(s) + +#if !defined(PFDL_PREC) || (__str_d(PFDL_PREC) == 1) +#undef PFDL_PREC +#define PFDL_PREC double +#endif + +#define PFDL_PREC_STR __str_a(PFDL_PREC) + +#define POINT util::geo::Point +#define LINE util::geo::Line +#define BOX util::geo::Box +#define POLYLINE util::geo::PolyLine + +#define BOX_PADDING 2500 + +#endif // PFAEDLE_DEF_H_ diff --git a/src/pfaedle/PfaedleMain.cpp b/src/pfaedle/PfaedleMain.cpp index 2c54028..0db163e 100644 --- a/src/pfaedle/PfaedleMain.cpp +++ b/src/pfaedle/PfaedleMain.cpp @@ -2,41 +2,92 @@ // Chair of Algorithms and Data Structures. // Authors: Patrick Brosi +#include +#include #include #include +#include +#include +#include + +#include #include #include #include #include + #include "ad/cppgtfs/Parser.h" #include "ad/cppgtfs/Writer.h" -#include "ad/cppgtfs/gtfs/Feed.h" #include "pfaedle/config/ConfigReader.h" #include "pfaedle/config/MotConfig.h" #include "pfaedle/config/MotConfigReader.h" -#include "pfaedle/eval/Collector.h" +#include "pfaedle/gtfs/Feed.h" +#include "pfaedle/gtfs/Writer.h" #include "pfaedle/netgraph/Graph.h" #include "pfaedle/osm/OsmIdSet.h" #include "pfaedle/router/ShapeBuilder.h" +#include "pfaedle/router/Stats.h" +#include "pfaedle/statsimi-classifier/StatsimiClassifier.h" #include "pfaedle/trgraph/Graph.h" +#include "util/Misc.h" #include "util/geo/output/GeoGraphJsonOutput.h" #include "util/geo/output/GeoJsonOutput.h" -#include "util/json/JsonWriter.h" +#include "util/json/Writer.h" #include "util/log/Log.h" -using std::string; -using pfaedle::router::MOTs; +#ifndef CFG_HOME_SUFFIX +#define CFG_HOME_SUFFIX "/.config" +#endif +#ifndef CFG_DIR +#define CFG_DIR "/etc" +#endif +#ifndef CFG_FILE_NAME +#define CFG_FILE_NAME "pfaedle.cfg" +#endif + +using configparser::ParseFileExc; +using pfaedle::config::Config; +using pfaedle::config::ConfigReader; +using pfaedle::config::MotConfig; +using pfaedle::config::MotConfigReader; using pfaedle::osm::BBoxIdx; using pfaedle::osm::OsmBuilder; -using pfaedle::config::MotConfig; -using pfaedle::config::Config; +using pfaedle::router::DistDiffTransWeight; +using pfaedle::router::DistDiffTransWeightNoHeur; +using pfaedle::router::ExpoTransWeight; +using pfaedle::router::ExpoTransWeightNoHeur; +using pfaedle::router::MOTs; +using pfaedle::router::NormDistrTransWeight; +using pfaedle::router::NormDistrTransWeightNoHeur; +using pfaedle::router::Router; +using pfaedle::router::RouterImpl; using pfaedle::router::ShapeBuilder; -using pfaedle::config::MotConfigReader; -using pfaedle::config::ConfigReader; -using pfaedle::eval::Collector; +using pfaedle::router::Stats; +using pfaedle::statsimiclassifier::BTSClassifier; +using pfaedle::statsimiclassifier::EDClassifier; +using pfaedle::statsimiclassifier::JaccardClassifier; +using pfaedle::statsimiclassifier::JaccardGeodistClassifier; +using pfaedle::statsimiclassifier::PEDClassifier; +using pfaedle::statsimiclassifier::StatsimiClassifier; -std::string getMotStr(const MOTs& mots); -MOTs getContMots(const MotConfig& motCfg, const MOTs& mots); +enum class RetCode { + SUCCESS = 0, + NO_INPUT_FEED = 1, + MULT_FEEDS_NOT_ALWD = 2, + TRIP_NOT_FOUND = 3, + GTFS_PARSE_ERR = 4, + NO_OSM_INPUT = 5, + MOT_CFG_PARSE_ERR = 6, + OSM_PARSE_ERR = 7, + GTFS_WRITE_ERR = 8, + NO_MOT_CFG = 9 +}; + +std::string getFileNameMotStr(const MOTs& mots); +std::vector getCfgPaths(const Config& cfg); + +// _____________________________________________________________________________ +void gtfsWarnCb(std::string msg) { LOG(WARN) << msg; } // _____________________________________________________________________________ int main(int argc, char** argv) { @@ -46,45 +97,107 @@ int main(int argc, char** argv) { // initialize randomness srand(time(NULL) + rand()); // NOLINT + // use utf8 locale + std::setlocale(LC_ALL, "en_US.utf8"); + + T_START(total); + Config cfg; MotConfigReader motCfgReader; ConfigReader cr; cr.read(&cfg, argc, argv); - ad::cppgtfs::gtfs::Feed gtfs; + std::vector gtfs(cfg.feedPaths.size()); - motCfgReader.parse(cfg.configPaths); + std::vector cfgPaths = getCfgPaths(cfg); + + try { + motCfgReader.parse(cfgPaths, cfg.motCfgParam); + } catch (const configparser::ParseExc& ex) { + LOG(ERROR) << "Could not parse MOT configurations, reason was:"; + std::cerr << ex.what() << std::endl; + exit(static_cast(RetCode::MOT_CFG_PARSE_ERR)); + } + + if (cfg.osmPath.empty() && !cfg.writeOverpass && !cfg.writeOsmfilter) { + std::cerr << "No OSM input file specified (-x), see --help." << std::endl; + exit(static_cast(RetCode::NO_OSM_INPUT)); + } + + if (motCfgReader.getConfigs().size() == 0) { + LOG(ERROR) << "No MOT configurations specified and no implicit " + "configurations found, see --help."; + exit(static_cast(RetCode::NO_MOT_CFG)); + } + + T_START(gtfsBuild); if (cfg.feedPaths.size() == 1) { - LOG(INFO) << "Reading " << cfg.feedPaths[0] << " ..."; - ad::cppgtfs::Parser p; - p.parse(>fs, cfg.feedPaths[0]); - LOG(INFO) << "Done."; + if (cfg.inPlace) cfg.outputPath = cfg.feedPaths[0]; + if (!cfg.writeOverpass && !cfg.writeOsmfilter) + LOG(INFO) << "Reading GTFS feed " << cfg.feedPaths[0] << " ..."; + try { + ad::cppgtfs::Parser p(cfg.feedPaths[0], false, + cfg.parseAdditionalGTFSFields, + cfg.verbosity ? gtfsWarnCb : 0); + p.parse(>fs[0]); + } catch (const ad::cppgtfs::ParserException& ex) { + LOG(ERROR) << "Could not parse input GTFS feed, reason was:"; + std::cerr << ex.what() << std::endl; + exit(static_cast(RetCode::GTFS_PARSE_ERR)); + } + } else if (cfg.writeOsm.size() || cfg.writeOverpass) { + for (size_t i = 0; i < cfg.feedPaths.size(); i++) { + if (!cfg.writeOverpass && !cfg.writeOsmfilter) + LOG(INFO) << "Reading GTFS feed " << cfg.feedPaths[i] << " ..."; + try { + ad::cppgtfs::Parser p(cfg.feedPaths[i]); + p.parse(>fs[i]); + } catch (const ad::cppgtfs::ParserException& ex) { + LOG(ERROR) << "Could not parse input GTFS feed, reason was:"; + std::cerr << ex.what() << std::endl; + exit(static_cast(RetCode::GTFS_PARSE_ERR)); + } + } } else if (cfg.feedPaths.size() > 1) { - LOG(ERROR) << "Maximal one input feed allowed."; - exit(1); + std::cerr << "Multiple feeds only allowed in filter mode." << std::endl; + exit(static_cast(RetCode::MULT_FEEDS_NOT_ALWD)); } + auto tGtfsBuild = T_STOP(gtfsBuild); + LOG(DEBUG) << "Read " << motCfgReader.getConfigs().size() << " unique MOT configs."; MOTs cmdCfgMots = cfg.mots; - ad::cppgtfs::gtfs::Trip* singleTrip = 0; + pfaedle::gtfs::Trip* singleTrip = 0; if (cfg.shapeTripId.size()) { - singleTrip = gtfs.getTrips().get(cfg.shapeTripId); + if (!cfg.feedPaths.size()) { + std::cout << "No input feed specified, see --help" << std::endl; + exit(static_cast(RetCode::NO_INPUT_FEED)); + } + singleTrip = gtfs[0].getTrips().get(cfg.shapeTripId); if (!singleTrip) { LOG(ERROR) << "Trip #" << cfg.shapeTripId << " not found."; - exit(1); + exit(static_cast(RetCode::TRIP_NOT_FOUND)); + } + } + + double maxSpeed = 0; + for (const auto& c : motCfgReader.getConfigs()) { + if (c.osmBuildOpts.maxSpeed > maxSpeed) { + maxSpeed = c.osmBuildOpts.maxSpeed; } } if (cfg.writeOsm.size()) { LOG(INFO) << "Writing filtered XML to " << cfg.writeOsm << " ..."; - BBoxIdx box(2500); - if (cfg.feedPaths.size()) { - box = ShapeBuilder::getPaddedGtfsBox(>fs, 2500, cmdCfgMots, - cfg.shapeTripId); + BBoxIdx box(BOX_PADDING); + + for (size_t i = 0; i < cfg.feedPaths.size(); i++) { + ShapeBuilder::getGtfsBox(>fs[i], cmdCfgMots, cfg.shapeTripId, true, + &box, maxSpeed, 0, cfg.verbosity); } OsmBuilder osmBuilder; std::vector opts; @@ -94,93 +207,317 @@ int main(int argc, char** argv) { opts.push_back(o.osmBuildOpts); } } - osmBuilder.filterWrite(cfg.osmPath, cfg.writeOsm, opts, box); - exit(0); + try { + osmBuilder.filterWrite(cfg.osmPath, cfg.writeOsm, opts, box); + } catch (const pfxml::parse_exc& ex) { + LOG(ERROR) << "Could not parse OSM data, reason was:"; + std::cerr << ex.what() << std::endl; + exit(static_cast(RetCode::OSM_PARSE_ERR)); + } + exit(static_cast(RetCode::SUCCESS)); + } else if (cfg.writeOverpass) { + BBoxIdx box(BOX_PADDING); + for (size_t i = 0; i < cfg.feedPaths.size(); i++) { + ShapeBuilder::getGtfsBox(>fs[i], cmdCfgMots, cfg.shapeTripId, true, + &box, maxSpeed, 0, cfg.verbosity); + } + OsmBuilder osmBuilder; + std::vector opts; + for (const auto& o : motCfgReader.getConfigs()) { + if (std::find_first_of(o.mots.begin(), o.mots.end(), cmdCfgMots.begin(), + cmdCfgMots.end()) != o.mots.end()) { + opts.push_back(o.osmBuildOpts); + } + } + osmBuilder.overpassQryWrite(&std::cout, opts, box); + exit(static_cast(RetCode::SUCCESS)); + } else if (cfg.writeOsmfilter) { + BBoxIdx box(BOX_PADDING); + OsmBuilder osmBuilder; + std::vector opts; + for (const auto& o : motCfgReader.getConfigs()) { + if (std::find_first_of(o.mots.begin(), o.mots.end(), cmdCfgMots.begin(), + cmdCfgMots.end()) != o.mots.end()) { + opts.push_back(o.osmBuildOpts); + } + } + osmBuilder.osmfilterRuleWrite(&std::cout, opts, box); + exit(static_cast(RetCode::SUCCESS)); + } else if (!cfg.feedPaths.size()) { + std::cout << "No input feed specified, see --help" << std::endl; + exit(static_cast(RetCode::NO_INPUT_FEED)); } - std::vector dfBins; - auto dfBinStrings = util::split(std::string(cfg.evalDfBins), ','); - for (auto st : dfBinStrings) dfBins.push_back(atof(st.c_str())); - Collector ecoll(cfg.evalPath, dfBins); + Stats stats; + double tOsmBuild = 0; + std::map> graphDimensions; + std::vector hopDists; for (const auto& motCfg : motCfgReader.getConfigs()) { - auto usedMots = getContMots(motCfg, cmdCfgMots); + std::string filePost; + auto usedMots = pfaedle::router::motISect(motCfg.mots, cmdCfgMots); if (!usedMots.size()) continue; + if (singleTrip && !usedMots.count(singleTrip->getRoute()->getType())) + continue; + if (motCfgReader.getConfigs().size() > 1) + filePost = getFileNameMotStr(usedMots); - std::string motStr = getMotStr(usedMots); - LOG(INFO) << "Calculating shapes for mots " << motStr; + std::string motStr = pfaedle::router::getMotStr(usedMots); + LOG(INFO) << "Matching shapes for mots " << motStr; - ShapeBuilder shapeBuilder(>fs, cmdCfgMots, motCfg, &ecoll, cfg); + try { + pfaedle::router::FeedStops fStops = + pfaedle::router::writeMotStops(>fs[0], usedMots, cfg.shapeTripId); - if (cfg.writeGraph) { - LOG(INFO) << "Outputting graph.json..."; - util::geo::output::GeoGraphJsonOutput out; - std::ofstream fstr(cfg.dbgOutputPath + "/graph.json"); - out.print(*shapeBuilder.getGraph(), fstr); - fstr.close(); - } + pfaedle::osm::Restrictor restr; + pfaedle::trgraph::Graph graph; + pfaedle::osm::OsmBuilder osmBuilder; - if (singleTrip) { - LOG(INFO) << "Outputting path.json..."; - std::ofstream pstr(cfg.dbgOutputPath + "/path.json"); - util::geo::output::GeoJsonOutput o(pstr); + pfaedle::osm::BBoxIdx box(BOX_PADDING); + ShapeBuilder::getGtfsBox( + >fs[0], usedMots, cfg.shapeTripId, cfg.dropShapes, &box, + motCfg.osmBuildOpts.maxSpeed, &hopDists, cfg.verbosity); - auto l = shapeBuilder.shapeL(singleTrip); + T_START(osmBuild); - if (singleTrip->getShape()) { - auto orig = Collector::getWebMercLine(singleTrip->getShape(), -1, -1); - o.print(orig, {{"ver", "old"}}); + if (fStops.size()) + osmBuilder.read(cfg.osmPath, motCfg.osmBuildOpts, &graph, box, + cfg.gridSize, &restr); + + tOsmBuild += T_STOP(osmBuild); + graphDimensions[filePost].first = graph.getNds().size(); + + for (const auto& nd : graph.getNds()) { + graphDimensions[filePost].second += nd->getAdjListOut().size(); } - o.print(l, {{"ver", "new"}}); - o.flush(); - pstr.close(); + StatsimiClassifier* statsimiClassifier; - exit(0); - } + if (motCfg.routingOpts.statsimiMethod == "bts") { + statsimiClassifier = new BTSClassifier(); + } else if (motCfg.routingOpts.statsimiMethod == "jaccard") { + statsimiClassifier = new JaccardClassifier(); + } else if (motCfg.routingOpts.statsimiMethod == "jaccard-geodist") { + statsimiClassifier = new JaccardGeodistClassifier(); + } else if (motCfg.routingOpts.statsimiMethod == "ed") { + statsimiClassifier = new EDClassifier(); + } else if (motCfg.routingOpts.statsimiMethod == "ped") { + statsimiClassifier = new PEDClassifier(); + } else { + LOG(ERROR) << "Unknown station similarity classifier " + << motCfg.routingOpts.statsimiMethod; + exit(1); + } - pfaedle::netgraph::Graph ng; - shapeBuilder.shape(&ng); + Router* router = 0; - if (cfg.buildTransitGraph) { - LOG(INFO) << "Outputting trgraph.json..."; - util::geo::output::GeoGraphJsonOutput out; - std::ofstream fstr(cfg.dbgOutputPath + "/trgraph.json"); - out.print(ng, fstr); - fstr.close(); + if (motCfg.routingOpts.transPenMethod == "exp") { + if (cfg.noAStar) + router = new RouterImpl(); + else + router = new RouterImpl(); + } else if (motCfg.routingOpts.transPenMethod == "distdiff") { + if (cfg.noAStar) + router = new RouterImpl(); + else + router = new RouterImpl(); + } else if (motCfg.routingOpts.transPenMethod == "timenorm") { + if (cfg.noAStar) + router = new RouterImpl(); + else + router = new RouterImpl(); + } else { + LOG(ERROR) << "Unknown routing method " + << motCfg.routingOpts.transPenMethod; + exit(1); + } + + ShapeBuilder shapeBuilder(>fs[0], usedMots, motCfg, &graph, &fStops, + &restr, statsimiClassifier, router, cfg); + + pfaedle::netgraph::Graph ng; + + if (singleTrip) { + mkdir(cfg.dbgOutputPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); + std::ofstream pstr(cfg.dbgOutputPath + "/path.json"); + util::geo::output::GeoJsonOutput o(pstr); + + auto l = shapeBuilder.shapeL(singleTrip); + stats += l.second; + + LOG(INFO) << "Outputting path.json..."; + // reproject to WGS84 to match RFC 7946 + o.print(l.first, {}); + + o.flush(); + pstr.close(); + } else { + stats += shapeBuilder.shapeify(&ng); + } + + if (router) delete router; + if (statsimiClassifier) delete statsimiClassifier; + + if (cfg.writeGraph) { + LOG(INFO) << "Outputting graph.json..."; + util::geo::output::GeoGraphJsonOutput out; + mkdir(cfg.dbgOutputPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); + std::ofstream fstr(cfg.dbgOutputPath + "/graph.json"); + out.print(*shapeBuilder.getGraph(), fstr); + fstr.close(); + } + + if (singleTrip) exit(static_cast(RetCode::SUCCESS)); + + if (cfg.buildTransitGraph) { + util::geo::output::GeoGraphJsonOutput out; + LOG(INFO) << "Outputting trgraph-" + filePost + ".json..."; + mkdir(cfg.dbgOutputPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); + std::ofstream fstr(cfg.dbgOutputPath + "/trgraph-" + filePost + + ".json"); + out.print(ng, fstr); + fstr.close(); + } + } catch (const pfxml::parse_exc& ex) { + LOG(ERROR) << "Could not parse OSM data, reason was:"; + std::cerr << ex.what() << std::endl; + exit(static_cast(RetCode::OSM_PARSE_ERR)); } } - if (cfg.evaluate) ecoll.printStats(&std::cout); + // outputting stats + if (cfg.writeStats) { + util::json::Dict graphSizes; + + double numNodesTot = 0; + double numEdgesTot = 0; + + for (const auto& gd : graphDimensions) { + util::json::Dict a; + a["num_nodes"] = gd.second.first; + a["num_edges"] = gd.second.second; + numNodesTot += gd.second.first; + numEdgesTot += gd.second.second; + graphSizes[gd.first] = a; + } + + double hopDistSum = 0; + for (auto d : hopDists) hopDistSum += d; + + util::json::Dict jsonStats = { + {"statistics", + util::json::Dict{ + {"gtfs_num_stations", gtfs[0].getStops().size()}, + {"gtfs_num_trips", gtfs[0].getTrips().size()}, + {"gtfs_avg_hop_dist", hopDistSum / (hopDists.size() * 1.0)}, + {"graph_dimension", graphSizes}, + {"num_nodes_tot", numNodesTot}, + {"num_edges_tot", numEdgesTot}, + {"num_tries", stats.numTries}, + {"num_trie_leafs", stats.numTrieLeafs}, + {"dijkstra_iters", stats.dijkstraIters}, + {"time_solve", stats.solveTime}, + {"time_read_osm", tOsmBuild}, + {"time_read_gtfs", tGtfsBuild}, + {"time_tot", T_STOP(total)}, + {"peak-memory", util::readableSize(util::getPeakRSS())}, + {"peak-memory-bytes", util::getPeakRSS()}}}}; + + std::ofstream ofs; + ofs.open(cfg.dbgOutputPath + "/stats.json"); + util::json::Writer wr(&ofs, 10, true); + wr.val(jsonStats); + wr.closeAll(); + } if (cfg.feedPaths.size()) { - LOG(INFO) << "Writing output GTFS to " << cfg.outputPath << " ..."; - ad::cppgtfs::Writer w; - w.write(>fs, cfg.outputPath); + try { + LOG(INFO) << "Writing output GTFS to " << cfg.outputPath << " ..."; + pfaedle::gtfs::Writer w; + w.write(>fs[0], cfg.outputPath); + } catch (const ad::cppgtfs::WriterException& ex) { + LOG(ERROR) << "Could not write output GTFS feed, reason was:"; + std::cerr << ex.what() << std::endl; + exit(static_cast(RetCode::GTFS_WRITE_ERR)); + } } - return (0); + return static_cast(RetCode::SUCCESS); } // _____________________________________________________________________________ -std::string getMotStr(const MOTs& mots) { - bool first = false; +std::string getFileNameMotStr(const MOTs& mots) { + MOTs tmp = mots; std::string motStr; - for (const auto& mot : mots) { - if (first) motStr += ", "; - motStr += "<" + Route::getTypeString(mot) + ">"; - first = true; + + std::string names[11] = {"tram", "subway", "rail", "bus", + "ferry", "cablecar", "gondola", "funicular", + "coach", "trolleybus", "monorail"}; + + for (const auto& n : names) { + const auto& types = ad::cppgtfs::gtfs::flat::Route::getTypesFromString(n); + const auto& isect = pfaedle::router::motISect(tmp, types); + + if (isect.size() == types.size()) { + if (motStr.size()) motStr += "-"; + motStr += n; + for (const auto& mot : isect) tmp.erase(mot); + } + } + + for (const auto& mot : tmp) { + if (motStr.size()) motStr += "-"; + motStr += ad::cppgtfs::gtfs::flat::Route::getTypeString(mot); } return motStr; } // _____________________________________________________________________________ -MOTs getContMots(const MotConfig& motCfg, const MOTs& mots) { - MOTs ret; - for (const auto& mot : mots) { - if (motCfg.mots.count(mot)) { - ret.insert(mot); +std::vector getCfgPaths(const Config& cfg) { + if (cfg.configPaths.size()) return cfg.configPaths; + std::vector ret; + + // install prefix global configuration path, if available + { + auto path = std::string(INSTALL_PREFIX) + std::string(CFG_DIR) + "/" + + "pfaedle" + "/" + CFG_FILE_NAME; + std::ifstream is(path); + + LOG(DEBUG) << "Testing for config file at " << path; + if (is.good()) { + ret.push_back(path); + LOG(DEBUG) << "Found implicit config file " << path; + } + } + + // local user configuration path, if available + { + auto path = util::getHomeDir() + CFG_HOME_SUFFIX + "/" + "pfaedle" + "/" + + CFG_FILE_NAME; + std::ifstream is(path); + + LOG(DEBUG) << "Testing for config file at " << path; + if (is.good()) { + ret.push_back(path); + LOG(DEBUG) << "Found implicit config file " << path; + } + } + + // free this here, as we use homedir in the block above + + // CWD + { + char cwd[PATH_MAX]; + if (getcwd(cwd, sizeof(cwd))) { + auto path = std::string(cwd) + "/" + CFG_FILE_NAME; + std::ifstream is(path); + + LOG(DEBUG) << "Testing for config file at " << path; + if (is.good()) { + ret.push_back(path); + LOG(DEBUG) << "Found implicit config file " << path; + } } } diff --git a/src/pfaedle/_config.h.in b/src/pfaedle/_config.h.in index 78f0fb9..2fe88eb 100644 --- a/src/pfaedle/_config.h.in +++ b/src/pfaedle/_config.h.in @@ -7,4 +7,7 @@ // version number from cmake version module #define VERSION_FULL "@VERSION_GIT_FULL@" +// version number from cmake version module +#define INSTALL_PREFIX "@CMAKE_INSTALL_PREFIX@" + #endif // SRC_PFAEDLE_CONFIG_H_N diff --git a/src/pfaedle/config/ConfigReader.cpp b/src/pfaedle/config/ConfigReader.cpp index 0771ff9..9e62a96 100644 --- a/src/pfaedle/config/ConfigReader.cpp +++ b/src/pfaedle/config/ConfigReader.cpp @@ -7,78 +7,114 @@ #include #include #include +#include "pfaedle/Def.h" #include "pfaedle/_config.h" #include "pfaedle/config/ConfigReader.h" +#include "pfaedle/config/PfaedleConfig.h" #include "util/String.h" +#include "util/geo/Geo.h" #include "util/log/Log.h" using pfaedle::config::ConfigReader; -using std::string; using std::exception; +using std::string; using std::vector; +static const char* YEAR = &__DATE__[7]; +static const char* COPY = + "University of Freiburg - Chair of Algorithms and Data Structures"; +static const char* AUTHORS = "Patrick Brosi "; + // _____________________________________________________________________________ -void ConfigReader::help() { - std::cout - << std::setfill(' ') << std::left - << "\033[1mpfaedle GTFS map matcher \033[22m\n" - << VERSION_FULL << " (built " << __DATE__ << " " << __TIME__ << ")\n\n" - << "(C) 2018 University of Freiburg\n" - << "Author: Patrick Brosi \n\n" - << "Usage: " - << " -x -c \n\n" - << "Allowed options:\n\n" - << "General:\n" - << std::setw(35) << " -v [ --version ]" - << "print version\n" - << std::setw(35) << " -h [ --help ]" - << "show this help message\n" - << "\nInput:\n" - << std::setw(35) << " -c [ --config ] arg" - << "pfaedle config file\n" - << std::setw(35) << " -i [ --input ] arg" - << "gtfs feed(s), may also be given as positional parameter (see usage)\n" - << std::setw(35) << " -x [ --osm-file ] arg" - << "OSM xml input file\n" - << std::setw(35) << " -m [ --mots ] arg (=all)" - << "MOTs to calculate shapes for, comma separated, either as string " - "{all,\n" - << std::setw(35) << " " - << "tram | streetcar, subway | metro, rail | train, bus, ferry | boat | " - "\n" - << std::setw(35) << " " - << "ship, cableclar, gondola, funicular} or as GTFS mot codes\n" - << "\nOutput:\n" - << std::setw(35) << " -o [ --output ] arg (=gtfs-out)" - << "GTFS output path\n" - << std::setw(35) << " -X [ --osm-out ] arg" - << "if specified, a filtered OSM file will be written to \n" - << "\nDebug Output:\n" - << std::setw(35) << " -d [ --dbg-path ] arg (=geo)" - << "output path for debug files\n" - << std::setw(35) << " --write-trgraph" - << "write transit graph as GeoJSON to /trgraph.json\n" - << std::setw(35) << " --write-graph" - << "write routing graph as GeoJSON to /graph.json\n" - << std::setw(35) << " --write-cgraph" - << "write combination graph as GeoJSON to /combraph.json\n" - << std::setw(35) << " --method arg (=global)" - << "matching method to use, either 'global' (based on HMM), 'greedy' or " - "'greedy2'\n" - << std::setw(35) << " --eval" - << "evaluate existing shapes against matched shapes and print results\n" - << std::setw(35) << " --eval-path arg (=.)" - << "path for eval file output\n" - << std::setw(35) << " --eval-df-bins arg (= )" - << "bins to use for d_f histogram, comma separated (e.g. 10,20,30,40)\n" - << "\nMisc:\n" - << std::setw(35) << " -T [ --trip-id ] arg" - << "Do routing only for trip , write result to\n" - << std::setw(35) << " " - << "/path.json\n" - << std::setw(35) << " --grid-size arg (=2000)" - << "Grid cell size\n"; +void ConfigReader::help(const char* bin) { + std::cout << std::setfill(' ') << std::left << "pfaedle GTFS map matcher " + << VERSION_FULL << "\n(built " << __DATE__ << " " << __TIME__ + << " with geometry precision <" << PFDL_PREC_STR << ">)\n\n" + << "(C) " << YEAR << " " << COPY << "\n" + << "Authors: " << AUTHORS << "\n\n" + << "Usage: " << bin << " -x \n\n" + << "Allowed options:\n\n" + << "General:\n" + << std::setw(35) << " -v [ --version ]" + << "print version\n" + << std::setw(35) << " -h [ --help ]" + << "show this help message\n" + << std::setw(35) << " -D [ --drop-shapes ]" + << "drop shapes already present in the feed and\n" + << std::setw(35) << " " + << " recalculate them\n" + << std::setw(35) << " --write-colors" + << "write matched route line colors, where missing\n" + << "\nInput:\n" + << std::setw(35) << " -c [ --config ] arg" + << "pfaedle config file\n" + << std::setw(35) << " -i [ --input ] arg" + << "gtfs feed(s), may also be given as positional\n" + << std::setw(35) << " " + << " parameter (see usage)\n" + << std::setw(35) << " -x [ --osm-file ] arg" + << "OSM xml input file\n" + << std::setw(35) << " -m [ --mots ] arg (=all)" + << "MOTs to calculate shapes for, comma sep.,\n" + << std::setw(35) << " " + << " either as string " + "{all, tram | streetcar,\n" + << std::setw(35) << " " + << " subway | metro, rail | train, bus,\n" + << std::setw(35) << " " + << " ferry | boat | ship, cablecar, gondola,\n" + << std::setw(35) << " " + << " funicular, coach, mono-rail | monorail,\n" + << std::setw(35) << " " + << " trolley | trolleybus | trolley-bus} or\n" + << std::setw(35) << " " + << " as GTFS mot codes\n" + << "\nOutput:\n" + << std::setw(35) << " -o [ --output ] arg (=gtfs-out)" + << "GTFS output path\n" + << std::setw(35) << " -X [ --osm-out ] arg" + << "if specified, a filtered OSM file will be\n" + << std::setw(35) << " " + << " written to \n" + << std::setw(35) << " --inplace" + << "overwrite input GTFS feed with output feed\n" + << "\nDebug Output:\n" + << std::setw(35) << " -d [ --dbg-path ] arg (=.)" + << "output path for debug files\n" + << std::setw(35) << " --write-trgraph" + << "write transit graph as GeoJSON to\n" + << std::setw(35) << " " + << " /trgraph.json\n" + << std::setw(35) << " --write-graph" + << "write routing graph as GeoJSON to\n" + << std::setw(35) << " " + << " /graph.json\n" + << "\nMisc:\n" + << std::setw(35) << " -T [ --trip-id ] arg" + << "Do routing only for trip , write result \n" + << std::setw(35) << " " + << " to /path.json\n" + << std::setw(35) << " --overpass" + << "Output overpass query for matching OSM data\n" + << std::setw(35) << " --osmfilter" + << "Output osmfilter filter rules for matching OSM data\n" + << std::setw(35) << " --grid-size arg (=2000)" + << "Approx. grid cell size in meters\n" + << std::setw(35) << " --no-fast-hops" + << "Disable fast hops technique\n" + << std::setw(35) << " --no-a-star" + << "Disable A* heuristic \n" + << std::setw(35) << " --no-trie" + << "Disable trip tries \n" + << std::setw(35) << " --no-hop-cache" + << "Disable hop cache \n" + << std::setw(35) << " --stats" + << "write stats to stats.json\n" + << std::setw(35) << " -W [ --warn ]" + << "enable verbose warning messages\n" + << std::setw(35) << " -P" + << "additional parameter string (in cfg file format)\n"; } // _____________________________________________________________________________ @@ -93,44 +129,45 @@ void ConfigReader::read(Config* cfg, int argc, char** argv) { {"drop-shapes", required_argument, 0, 'D'}, {"mots", required_argument, NULL, 'm'}, {"grid-size", required_argument, 0, 'g'}, + {"overpass", no_argument, 0, 'a'}, + {"osmfilter", no_argument, 0, 'f'}, {"osm-out", required_argument, 0, 'X'}, {"trip-id", required_argument, 0, 'T'}, {"write-graph", no_argument, 0, 1}, - {"write-cgraph", no_argument, 0, 2}, {"write-trgraph", no_argument, 0, 4}, - {"method", required_argument, 0, 5}, - {"eval", no_argument, 0, 3}, - {"eval-path", required_argument, 0, 6}, - {"eval-df-bins", required_argument, 0, 7}, {"dbg-path", required_argument, 0, 'd'}, {"version", no_argument, 0, 'v'}, {"help", no_argument, 0, 'h'}, + {"inplace", no_argument, 0, 9}, + {"no-fast-hops", no_argument, 0, 10}, + {"no-a-star", no_argument, 0, 11}, + {"no-trie", no_argument, 0, 12}, + {"write-colors", no_argument, 0, 13}, + {"stats", no_argument, 0, 14}, + {"no-hop-cache", no_argument, 0, 15}, + {"gaussian-noise", required_argument, 0, 16}, + {"warn", no_argument, 0, 'W'}, + {"keep-additional-gtfs-fields", no_argument, 0, 'F'}, {0, 0, 0, 0}}; - char c; - while ((c = getopt_long(argc, argv, ":o:hvi:c:x:Dm:g:X:T:d:p", ops, 0)) != + int c; + while ((c = getopt_long(argc, argv, ":o:hvi:c:x:Dm:g:X:T:d:pP:FW", ops, 0)) != -1) { switch (c) { case 1: cfg->writeGraph = true; break; - case 2: - cfg->writeCombGraph = true; - break; - case 3: - cfg->evaluate = true; - break; case 4: cfg->buildTransitGraph = true; break; - case 5: - cfg->solveMethod = optarg; + case 10: + cfg->noFastHops = true; break; - case 6: - cfg->evalPath = optarg; + case 11: + cfg->noAStar = true; break; - case 7: - cfg->evalDfBins = optarg; + case 12: + cfg->noTrie = true; break; case 'o': cfg->outputPath = optarg; @@ -151,7 +188,7 @@ void ConfigReader::read(Config* cfg, int argc, char** argv) { motStr = optarg; break; case 'g': - cfg->gridSize = atof(optarg); + cfg->gridSize = atof(optarg) / util::geo::M_PER_DEG; break; case 'X': cfg->writeOsm = optarg; @@ -159,18 +196,47 @@ void ConfigReader::read(Config* cfg, int argc, char** argv) { case 'T': cfg->shapeTripId = optarg; break; + case 'P': + cfg->motCfgParam += std::string("\n") + optarg; + break; case 'd': cfg->dbgOutputPath = optarg; break; + case 'a': + cfg->writeOverpass = true; + break; + case 'f': + cfg->writeOsmfilter = true; + break; + case 9: + cfg->inPlace = true; + break; + case 13: + cfg->writeColors = true; + break; + case 14: + cfg->writeStats = true; + break; + case 15: + cfg->noHopCache = true; + break; + case 16: + cfg->gaussianNoise = atof(optarg); + break; + case 'W': + cfg->verbosity = 1; + break; + case 'F': + cfg->parseAdditionalGTFSFields = true; + break; case 'v': - std::cout << VERSION_FULL << " (built " << __DATE__ << " " << __TIME__ - << ")\n\n"; + std::cout << "pfaedle " << VERSION_FULL << std::endl; exit(0); case 'p': printOpts = true; break; case 'h': - help(); + help(argv[0]); exit(0); case ':': std::cerr << argv[optind - 1]; @@ -192,7 +258,8 @@ void ConfigReader::read(Config* cfg, int argc, char** argv) { auto v = util::split(motStr, ','); for (const auto& motStr : v) { - const auto& mots = Route::getTypesFromString(util::trim(motStr)); + const auto& mots = + ad::cppgtfs::gtfs::flat::Route::getTypesFromString(util::trim(motStr)); cfg->mots.insert(mots.begin(), mots.end()); } diff --git a/src/pfaedle/config/ConfigReader.h b/src/pfaedle/config/ConfigReader.h index 1a038ec..02490bc 100644 --- a/src/pfaedle/config/ConfigReader.h +++ b/src/pfaedle/config/ConfigReader.h @@ -14,7 +14,7 @@ namespace config { class ConfigReader { public: static void read(Config* targetConfig, int argc, char** argv); - static void help(); + static void help(const char* bin); }; } } diff --git a/src/pfaedle/config/MotConfig.h b/src/pfaedle/config/MotConfig.h index 4a85d85..f99f4eb 100644 --- a/src/pfaedle/config/MotConfig.h +++ b/src/pfaedle/config/MotConfig.h @@ -5,17 +5,19 @@ #ifndef PFAEDLE_CONFIG_MOTCONFIG_H_ #define PFAEDLE_CONFIG_MOTCONFIG_H_ +#include +#include #include "pfaedle/osm/OsmBuilder.h" #include "pfaedle/router/Router.h" namespace pfaedle { namespace config { - struct MotConfig { router::MOTs mots; osm::OsmReadOpts osmBuildOpts; router::RoutingOpts routingOpts; + std::string transWeight; }; inline bool operator==(const MotConfig& a, const MotConfig& b) { diff --git a/src/pfaedle/config/MotConfigReader.cpp b/src/pfaedle/config/MotConfigReader.cpp index dc5ac2e..52b8218 100644 --- a/src/pfaedle/config/MotConfigReader.cpp +++ b/src/pfaedle/config/MotConfigReader.cpp @@ -2,362 +2,643 @@ // Chair of Algorithms and Data Structures. // Authors: Patrick Brosi +#include #include #include #include "pfaedle/config/MotConfigReader.h" +#include "pfaedle/osm/OsmReadOpts.h" #include "util/Misc.h" #include "util/String.h" +#include "util/log/Log.h" -using pfaedle::config::MotConfigReader; -using pfaedle::config::MotConfig; -using pfaedle::osm::FilterRule; -using pfaedle::osm::KeyVal; +using ad::cppgtfs::gtfs::Route; using configparser::ConfigFileParser; using configparser::ParseExc; +using pfaedle::config::MotConfig; +using pfaedle::config::MotConfigReader; using pfaedle::osm::DeepAttrRule; +using pfaedle::osm::FilterRule; +using pfaedle::osm::KeyVal; using pfaedle::trgraph::ReplRules; -using ad::cppgtfs::gtfs::Route; + +double DEF_TRANS_PEN = 0.0083; // _____________________________________________________________________________ MotConfigReader::MotConfigReader() {} // _____________________________________________________________________________ -void MotConfigReader::parse(const std::vector& paths) { +void MotConfigReader::parse(const std::vector& paths, + const std::string& literal) { + ConfigFileParser p; + + // parse explicitely given paths for (const auto& s : paths) { - ConfigFileParser p; + LOG(DEBUG) << "Reading config file " << s; p.parse(s); + } - for (const auto& sec : p.getSecs()) { - MotConfig curCfg; - std::string secStr = sec.first; + if (literal.size()) p.parseStr(literal); - if (p.hasKey(secStr, "osm_filter_keep")) { - for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_keep", ' ')) { + for (const auto& sec : p.getSecs()) { + MotConfig cfg; + + cfg.transWeight = "expo"; + + std::string secStr = sec.first; + if (secStr.empty()) continue; + + if (p.hasKey(secStr, "routing_emission_method")) { + cfg.routingOpts.emPenMethod = + p.getStr(secStr, "routing_emission_method"); + } else { + cfg.routingOpts.emPenMethod = "exp"; + } + + if (p.hasKey(secStr, "routing_transition_method")) { + cfg.routingOpts.transPenMethod = + p.getStr(secStr, "routing_transition_method"); + } else { + cfg.routingOpts.transPenMethod = "exp"; + } + + if (p.hasKey(secStr, "station_similarity_classification_method")) { + cfg.routingOpts.statsimiMethod = + p.getStr(secStr, "station_similarity_classification_method"); + } else { + cfg.routingOpts.statsimiMethod = "jaccard-geodist"; + } + + if (p.hasKey(secStr, "routing_use_stations")) { + cfg.routingOpts.useStations = p.getBool(secStr, "routing_use_stations"); + } else { + cfg.routingOpts.useStations = true; + } + + if (p.hasKey(secStr, "osm_filter_keep")) { + for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_keep", ' ')) { + auto fRule = getFRule(kvs); + cfg.osmBuildOpts.keepFilter[fRule.kv.first].insert( + osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); + } + } + + for (uint8_t i = 0; i < 8; i++) { + std::string name = std::string("osm_filter_lvl") + std::to_string(i); + if (p.hasKey(secStr, name)) { + for (const auto& kvs : p.getStrArr(sec.first, name, ' ')) { auto fRule = getFRule(kvs); - curCfg.osmBuildOpts.keepFilter[fRule.kv.first].insert( + cfg.osmBuildOpts.levelFilters[i][fRule.kv.first].insert( osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); } } + } - for (uint8_t i = 0; i < 7; i++) { - std::string name = - std::string("osm_filter_lvl") + std::to_string(i + 1); - if (p.hasKey(secStr, name)) { - for (const auto& kvs : p.getStrArr(sec.first, name, ' ')) { - auto fRule = getFRule(kvs); - curCfg.osmBuildOpts.levelFilters[i][fRule.kv.first].insert( - osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); - } + if (p.hasKey(secStr, "osm_filter_drop")) { + for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_drop", ' ')) { + auto fRule = getFRule(kvs); + cfg.osmBuildOpts.dropFilter[fRule.kv.first].insert( + osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); + } + } + + if (p.hasKey(secStr, "osm_max_snap_level")) { + cfg.osmBuildOpts.maxSnapLevel = p.getInt(sec.first, "osm_max_snap_level"); + } else { + cfg.osmBuildOpts.maxSnapLevel = 7; + } + + if (p.hasKey(secStr, "osm_filter_nohup")) { + for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_nohup", ' ')) { + auto fRule = getFRule(kvs); + cfg.osmBuildOpts.noHupFilter[fRule.kv.first].insert( + osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); + } + } + + if (p.hasKey(secStr, "osm_filter_oneway")) { + for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_oneway", ' ')) { + auto fRule = getFRule(kvs); + cfg.osmBuildOpts.oneWayFilter[fRule.kv.first].insert( + osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); + } + } + + if (p.hasKey(secStr, "osm_filter_oneway_reverse")) { + for (const auto& kvs : + p.getStrArr(sec.first, "osm_filter_oneway_reverse", ' ')) { + auto fRule = getFRule(kvs); + cfg.osmBuildOpts.oneWayFilterRev[fRule.kv.first].insert( + osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); + } + } + + if (p.hasKey(secStr, "osm_filter_undirected")) { + for (const auto& kvs : + p.getStrArr(sec.first, "osm_filter_undirected", ' ')) { + auto fRule = getFRule(kvs); + cfg.osmBuildOpts.twoWayFilter[fRule.kv.first].insert( + osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); + } + } + + if (p.hasKey(secStr, "osm_filter_station")) { + for (const auto& kvs : + p.getStrArr(sec.first, "osm_filter_station", ' ')) { + auto fRule = getFRule(kvs); + cfg.osmBuildOpts.stationFilter[fRule.kv.first].insert( + osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); + } + } + + if (p.hasKey(secStr, "osm_filter_station_blocker")) { + for (const auto& kvs : + p.getStrArr(sec.first, "osm_filter_station_blocker", ' ')) { + auto fRule = getFRule(kvs); + cfg.osmBuildOpts.stationBlockerFilter[fRule.kv.first].insert( + osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); + } + } + + if (p.hasKey(secStr, "osm_filter_turning_circle")) { + for (const auto& kvs : + p.getStrArr(sec.first, "osm_filter_turning_circle", ' ')) { + auto fRule = getFRule(kvs); + cfg.osmBuildOpts.turnCycleFilter[fRule.kv.first].insert( + osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); + } + } + + if (p.hasKey(secStr, "osm_node_positive_restriction")) { + for (const auto& kvs : + p.getStrArr(sec.first, "osm_node_positive_restriction", ' ')) { + auto fRule = getFRule(kvs); + cfg.osmBuildOpts.restrPosRestr[fRule.kv.first].insert( + osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); + } + } + + if (p.hasKey(secStr, "osm_node_negative_restriction")) { + for (const auto& kvs : + p.getStrArr(sec.first, "osm_node_negative_restriction", ' ')) { + auto fRule = getFRule(kvs); + cfg.osmBuildOpts.restrNegRestr[fRule.kv.first].insert( + osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); + } + } + + if (p.hasKey(secStr, "osm_filter_no_restriction")) { + for (const auto& kvs : + p.getStrArr(sec.first, "osm_filter_no_restriction", ' ')) { + auto fRule = getFRule(kvs); + cfg.osmBuildOpts.noRestrFilter[fRule.kv.first].insert( + osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); + } + } + + if (p.hasKey(secStr, "osm_station_name_attrs")) { + for (const std::string& r : + p.getStrArr(sec.first, "osm_station_name_attrs", ' ')) { + cfg.osmBuildOpts.statAttrRules.nameRule.push_back(getDeepAttrRule(r)); + } + } + + if (p.hasKey(secStr, "osm_track_number_tags")) { + for (const std::string& r : + p.getStrArr(sec.first, "osm_track_number_tags", ' ')) { + cfg.osmBuildOpts.statAttrRules.platformRule.push_back( + getDeepAttrRule(r)); + } + } + + if (p.hasKey(secStr, "osm_station_id_attrs")) { + for (const std::string& r : + p.getStrArr(sec.first, "osm_station_id_attrs", ' ')) { + cfg.osmBuildOpts.statAttrRules.idRule.push_back(getDeepAttrRule(r)); + } + } + + if (p.hasKey(secStr, "osm_edge_track_number_tags")) { + for (const std::string& r : + p.getStrArr(sec.first, "osm_edge_track_number_tags", ' ')) { + cfg.osmBuildOpts.edgePlatformRules.push_back(getDeepAttrRule(r)); + } + } + + if (p.hasKey(secStr, "osm_station_group_attrs")) { + LOG(WARN) << "Option osm_station_group_attrs has been removed."; + } + + // default value, to enable color writing on old configs + cfg.osmBuildOpts.relLinerules.colorRule = {"colour", "color"}; + + if (p.hasKey(secStr, "osm_line_relation_tags")) { + auto arr = p.getStrArr(secStr, "osm_line_relation_tags", ' '); + + for (const auto& ruleStr : arr) { + auto rule = getKv(ruleStr); + auto tags = util::split(rule.second, ','); + if (rule.first == "from_name") + cfg.osmBuildOpts.relLinerules.fromNameRule = tags; + else if (rule.first == "to_name") + cfg.osmBuildOpts.relLinerules.toNameRule = tags; + else if (rule.first == "line_name") + cfg.osmBuildOpts.relLinerules.sNameRule = tags; + else if (rule.first == "line_color") + cfg.osmBuildOpts.relLinerules.colorRule = tags; + } + } + + cfg.osmBuildOpts.maxSnapDistance = 50; + if (p.hasKey(secStr, "osm_max_snap_distance")) { + auto v = p.getDoubleArr(secStr, "osm_max_snap_distance", ','); + if (v.size()) cfg.osmBuildOpts.maxSnapDistance = v.back(); + } + + cfg.osmBuildOpts.maxStationCandDistance = + cfg.osmBuildOpts.maxSnapDistance * 2; + if (p.hasKey(secStr, "osm_max_station_cand_distance")) { + auto v = p.getDouble(secStr, "osm_max_station_cand_distance"); + cfg.osmBuildOpts.maxStationCandDistance = v; + } + + if (p.hasKey(secStr, "osm_max_snap_fallback_distance")) { + LOG(WARN) << "Option osm_max_snap_fallback_distance has been removed."; + } + + if (p.hasKey(secStr, "osm_max_osm_station_distance")) { + double ref = p.getDouble(secStr, "osm_max_osm_station_distance"); + cfg.osmBuildOpts.maxOsmStationDistances.push_back(ref); + } else { + cfg.osmBuildOpts.maxOsmStationDistances.push_back(15); + } + + if (p.hasKey(secStr, "osm_max_node_block_distance")) { + cfg.osmBuildOpts.maxBlockDistance = + p.getDouble(secStr, "osm_max_node_block_distance"); + } else { + cfg.osmBuildOpts.maxBlockDistance = + *std::max_element(cfg.osmBuildOpts.maxOsmStationDistances.begin(), + cfg.osmBuildOpts.maxOsmStationDistances.end()) / + 8; + } + + double DEF_SPEED = 85; + for (uint8_t i = 0; i < 8; i++) { + std::string name = + std::string("routing_lvl") + std::to_string(i) + "_fac"; + if (p.hasKey(secStr, name)) { + double f = p.getPosDouble(sec.first, name); + LOG(WARN) << "Option " << name << " is deprecated, use osm_lvl" + << std::to_string(i) << "_avg_speed instead."; + double v = DEF_SPEED / f; + LOG(DEBUG) << " (using osm_lvl" << std::to_string(i) << "_avg_speed of " + << v << " instead)"; + cfg.osmBuildOpts.levelDefSpeed[i] = v * 0.2777; // store in m/s + } + } + + for (uint8_t i = 0; i < 8; i++) { + std::string name = + std::string("osm_lvl") + std::to_string(i) + "_avg_speed"; + if (p.hasKey(secStr, name)) { + double v = p.getPosDouble(sec.first, name); + cfg.osmBuildOpts.levelDefSpeed[i] = v * 0.2777; // store in m/s + } + } + + if (p.hasKey(secStr, "routing_one_way_meter_punish_fac")) { + LOG(WARN) << "Option routing_one_way_meter_punish_fac is deprecated, use " + "osm_one_way_speed_penalty_fac instead."; + cfg.osmBuildOpts.oneWaySpeedPen = + 1 + p.getPosDouble(secStr, "routing_one_way_meter_punish_fac"); + LOG(DEBUG) << " (using osm_one_way_speed_penalty_fac of " + << cfg.osmBuildOpts.oneWaySpeedPen << " instead)"; + } else { + cfg.osmBuildOpts.oneWaySpeedPen = 1; + } + + if (p.hasKey(secStr, "osm_one_way_speed_penalty_fac")) { + cfg.osmBuildOpts.oneWaySpeedPen = + p.getPosDouble(secStr, "osm_one_way_speed_penalty_fac"); + } else { + // def already set above + } + + if (p.hasKey(secStr, "osm_one_way_entry_cost")) { + cfg.osmBuildOpts.oneWayEntryCost = + p.getPosDouble(secStr, "osm_one_way_entry_cost"); + + } else { + cfg.osmBuildOpts.oneWayEntryCost = 0; + } + + // take the same cost for taking restricted turns to keep + // configuration simple + double val = cfg.osmBuildOpts.oneWayEntryCost * 10.0; + if (val > std::numeric_limits::max()) { + val = std::numeric_limits::max(); + } + + cfg.routingOpts.turnRestrCost = val; + + if (p.hasKey(secStr, "routing_full_turn_punish")) { + double val = p.getPosDouble(secStr, "routing_full_turn_punish"); + + LOG(WARN) << "Option routing_full_turn_punish is deprecated, use " + "routing_full_turn_penalty instead."; + + val /= cfg.osmBuildOpts.levelDefSpeed[0]; + + LOG(DEBUG) << " (using routing_full_turn_penalty of " << val + << " instead)"; + + val *= 10.0; + + if (val > std::numeric_limits::max()) { + val = std::numeric_limits::max(); + } + + cfg.routingOpts.fullTurnPunishFac = val; + } + + if (p.hasKey(secStr, "routing_full_turn_penalty")) { + double val = p.getPosDouble(secStr, "routing_full_turn_penalty") * 10.0; + + if (val > std::numeric_limits::max()) { + val = std::numeric_limits::max(); + } + + cfg.routingOpts.fullTurnPunishFac = val; + } + + if (p.hasKey(secStr, "routing_no_self_hops")) { + cfg.routingOpts.noSelfHops = p.getBool(secStr, "routing_no_self_hops"); + } + + if (p.hasKey(secStr, "routing_full_turn_angle")) { + double ang = p.getPosDouble(secStr, "routing_full_turn_angle"); + cfg.routingOpts.fullTurnAngle = ang; + cfg.osmBuildOpts.fullTurnAngle = ang; + } else { + cfg.routingOpts.fullTurnAngle = 5; + cfg.osmBuildOpts.fullTurnAngle = 5; + } + + if (p.hasKey(secStr, "routing_snap_full_turn_angle")) { + double ang = p.getPosDouble(secStr, "routing_snap_full_turn_angle"); + cfg.osmBuildOpts.maxAngleSnapReach = ang; + } else { + cfg.osmBuildOpts.maxAngleSnapReach = cfg.routingOpts.fullTurnAngle; + } + + if (p.hasKey(secStr, "routing_pass_thru_station_punish")) { + LOG(WARN) << "Option routing_pass_thru_station_punish has been removed."; + } + + cfg.routingOpts.turnRestrCost *= 10.0; + + if (p.hasKey(secStr, "routing_no_lines_punish_fac")) { + LOG(WARN) << "Option routing_no_lines_punish_fac is deprecated, use " + "routing_no_lines_penalty_fac instead."; + + cfg.routingOpts.noLinesPunishFact = + 1 + p.getPosDouble(secStr, "routing_no_lines_punish_fac"); + + LOG(DEBUG) << " (using routing_no_lines_penalty_fac of " + << cfg.routingOpts.noLinesPunishFact << " instead)"; + } else { + cfg.routingOpts.noLinesPunishFact = 1; + } + + if (p.hasKey(secStr, "routing_no_lines_penalty_fac")) { + cfg.routingOpts.noLinesPunishFact = + p.getPosDouble(secStr, "routing_no_lines_penalty_fac"); + } else { + // default already set above + } + + // store this at two places, as we are writing the punishment into the graph + cfg.osmBuildOpts.noLinesPunishFact = cfg.routingOpts.noLinesPunishFact; + + if (p.hasKey(secStr, "routing_line_unmatched_punish_fac")) { + LOG(WARN) + << "Option routing_line_unmatched_punish_fac is deprecated, use " + "routing_line_unmatched_time_penalty_fac, " + "routing_line_station_from_unmatched_time_penalty, and " + "routing_line_station_to_unmatched_time_penalty instead."; + + cfg.routingOpts.lineUnmatchedPunishFact = + 1 + p.getPosDouble(secStr, "routing_line_unmatched_punish_fac") / 3; + + cfg.routingOpts.lineNameFromUnmatchedPunishFact = + 1 + p.getPosDouble(secStr, "routing_line_unmatched_punish_fac") / 3; + + cfg.routingOpts.lineNameToUnmatchedPunishFact = + 1 + p.getPosDouble(secStr, "routing_line_unmatched_punish_fac") / 3; + + LOG(DEBUG) << " (using routing_line_unmatched_punish_fac of " + << cfg.routingOpts.lineUnmatchedPunishFact << " instead)"; + LOG(DEBUG) + << " (using routing_line_station_from_unmatched_time_penalty of " + << cfg.routingOpts.lineNameFromUnmatchedPunishFact << " instead)"; + LOG(DEBUG) << " (using routing_line_station_to_unmatched_time_penalty of " + << cfg.routingOpts.lineNameToUnmatchedPunishFact + << " instead)"; + } + + if (p.hasKey(secStr, "routing_line_unmatched_time_penalty_fac")) { + cfg.routingOpts.lineUnmatchedPunishFact = + p.getPosDouble(secStr, "routing_line_unmatched_time_penalty_fac"); + } + + if (p.hasKey(secStr, "routing_line_station_from_unmatched_time_penalty")) { + cfg.routingOpts.lineNameFromUnmatchedPunishFact = p.getPosDouble( + secStr, "routing_line_station_from_unmatched_time_penalty"); + } + + if (p.hasKey(secStr, "routing_line_station_to_unmatched_time_penalty")) { + cfg.routingOpts.lineNameToUnmatchedPunishFact = p.getPosDouble( + secStr, "routing_line_station_to_unmatched_time_penalty"); + } + + if (p.hasKey(secStr, "routing_platform_unmatched_punish")) { + LOG(WARN) + << "Option routing_platform_unmatched_punish is deprecated, use " + "routing_platform_unmatched_penalty instead."; + cfg.routingOpts.platformUnmatchedPen = + p.getPosDouble(secStr, "routing_platform_unmatched_punish"); + + cfg.routingOpts.platformUnmatchedPen = + cfg.routingOpts.platformUnmatchedPen * + (DEF_TRANS_PEN / cfg.osmBuildOpts.levelDefSpeed[0]); + + LOG(DEBUG) << " (using routing_platform_unmatched_penalty of " + << cfg.routingOpts.platformUnmatchedPen << " instead)"; + } else { + cfg.routingOpts.platformUnmatchedPen = 0; + } + + if (p.hasKey(secStr, "routing_platform_unmatched_penalty")) { + cfg.routingOpts.platformUnmatchedPen = + p.getPosDouble(secStr, "routing_platform_unmatched_penalty"); + } else { + // default already set above + } + + if (p.hasKey(secStr, "routing_transition_penalty_fac")) { + cfg.routingOpts.transitionPen = + p.getPosDouble(secStr, "routing_transition_penalty_fac"); + } else { + cfg.routingOpts.transitionPen = DEF_TRANS_PEN; + } + + if (p.hasKey(secStr, "routing_station_distance_punish_fac")) { + cfg.routingOpts.stationDistPenFactor = + p.getPosDouble(secStr, "routing_station_distance_punish_fac"); + LOG(WARN) << "Option routing_station_distance_punish_fac is deprecated, " + "use routing_station_move_penalty_fac instead."; + cfg.routingOpts.stationDistPenFactor = + cfg.routingOpts.stationDistPenFactor * + (DEF_TRANS_PEN / cfg.osmBuildOpts.levelDefSpeed[0]); + LOG(DEBUG) << " (using routing_station_move_penalty_fac of " + << cfg.routingOpts.stationDistPenFactor << " instead)"; + } else { + cfg.routingOpts.stationDistPenFactor = + cfg.routingOpts.stationDistPenFactor * + (DEF_TRANS_PEN / cfg.osmBuildOpts.levelDefSpeed[0]); + } + + if (p.hasKey(secStr, "routing_station_move_penalty_fac")) { + cfg.routingOpts.stationDistPenFactor = + p.getPosDouble(secStr, "routing_station_move_penalty_fac"); + } else { + // the default value was already set above + } + + if (p.hasKey(secStr, "routing_non_osm_station_punish")) { + cfg.routingOpts.nonStationPen = + p.getPosDouble(secStr, "routing_non_osm_station_punish"); + LOG(WARN) << "Option routing_non_osm_station_punish is deprecated, use " + "routing_non_station_penalty instead."; + cfg.routingOpts.nonStationPen = + cfg.routingOpts.nonStationPen * + (DEF_TRANS_PEN / cfg.osmBuildOpts.levelDefSpeed[0]); + LOG(DEBUG) << " (using routing_non_station_penalty of " + << cfg.routingOpts.nonStationPen << " instead)"; + } else { + cfg.routingOpts.nonStationPen = 0; + } + + if (p.hasKey(secStr, "routing_non_station_penalty")) { + cfg.routingOpts.nonStationPen = + p.getPosDouble(secStr, "routing_non_station_penalty"); + } else { + // default was already set above + } + + if (p.hasKey(secStr, "routing_station_unmatched_penalty")) { + cfg.routingOpts.stationUnmatchedPen = + p.getPosDouble(secStr, "routing_station_unmatched_penalty"); + } else { + cfg.routingOpts.stationUnmatchedPen = cfg.routingOpts.nonStationPen / 2; + } + + if (p.hasKey(secStr, "station_normalize_chain")) { + try { + auto arr = p.getStrArr(secStr, "station_normalize_chain", ';'); + cfg.osmBuildOpts.statNormzer = trgraph::Normalizer(getNormRules(arr)); + } catch (const std::exception& e) { + throw ParseExc(p.getVal(secStr, "station_normalize_chain").line, + p.getVal(secStr, "station_normalize_chain").pos, + "", + std::string("", + p.getVal(secStr, "station_normalize_chain").file); + } + } + + if (p.hasKey(secStr, "track_normalize_chain")) { + try { + auto arr = p.getStrArr(secStr, "track_normalize_chain", ';'); + cfg.osmBuildOpts.trackNormzer = trgraph::Normalizer(getNormRules(arr)); + } catch (const std::exception& e) { + throw ParseExc(p.getVal(secStr, "track_normalize_chain").line, + p.getVal(secStr, "track_normalize_chain").pos, + "", + std::string("", + p.getVal(secStr, "track_normalize_chain").file); + } + } + + if (p.hasKey(secStr, "line_normalize_chain")) { + try { + auto arr = p.getStrArr(secStr, "line_normalize_chain", ';'); + cfg.osmBuildOpts.lineNormzer = trgraph::Normalizer(getNormRules(arr)); + } catch (const std::exception& e) { + throw ParseExc(p.getVal(secStr, "line_normalize_chain").line, + p.getVal(secStr, "line_normalize_chain").pos, + "", + std::string("", + p.getVal(secStr, "line_normalize_chain").file); + } + } + + if (p.hasKey(secStr, "station_id_normalize_chain")) { + try { + auto arr = p.getStrArr(secStr, "station_id_normalize_chain", ';'); + cfg.osmBuildOpts.idNormzer = trgraph::Normalizer(getNormRules(arr)); + } catch (const std::exception& e) { + throw ParseExc(p.getVal(secStr, "station_id_normalize_chain").line, + p.getVal(secStr, "station_id_normalize_chain").pos, + "", + std::string("", + p.getVal(secStr, "station_id_normalize_chain").file); + } + } + + // determine the maximum possible speed for this config, this is later + // used to filter out station which are so far out of reach we don't + // have to consider them for the bounding box calculation + cfg.osmBuildOpts.maxSpeed = 0; + cfg.osmBuildOpts.maxSpeedCorFac = 1; + for (size_t i = 0; i < 8; i++) { + if (cfg.osmBuildOpts.levelDefSpeed[i] > cfg.osmBuildOpts.maxSpeed) + cfg.osmBuildOpts.maxSpeed = cfg.osmBuildOpts.levelDefSpeed[i]; + } + + if (cfg.routingOpts.lineUnmatchedPunishFact < 1) + cfg.osmBuildOpts.maxSpeedCorFac *= + cfg.routingOpts.lineUnmatchedPunishFact; + if (cfg.routingOpts.lineNameFromUnmatchedPunishFact < 1) + cfg.osmBuildOpts.maxSpeedCorFac *= + cfg.routingOpts.lineNameFromUnmatchedPunishFact; + if (cfg.routingOpts.lineNameToUnmatchedPunishFact < 1) + cfg.osmBuildOpts.maxSpeedCorFac *= + cfg.routingOpts.lineNameToUnmatchedPunishFact; + + if (cfg.routingOpts.noLinesPunishFact < 1) + cfg.osmBuildOpts.maxSpeedCorFac *= cfg.routingOpts.noLinesPunishFact; + + if (cfg.osmBuildOpts.oneWaySpeedPen < 1) + cfg.osmBuildOpts.maxSpeedCorFac *= cfg.osmBuildOpts.oneWaySpeedPen; + + cfg.osmBuildOpts.maxSpeed /= cfg.osmBuildOpts.maxSpeedCorFac; + + bool found = false; + + for (auto& exCfg : _cfgs) { + if (cfg == exCfg) { + for (auto mot : + ad::cppgtfs::gtfs::flat::Route::getTypesFromString(secStr)) { + exCfg.mots.insert(mot); } + found = true; + break; } + } - if (p.hasKey(secStr, "osm_filter_drop")) { - for (const auto& kvs : p.getStrArr(sec.first, "osm_filter_drop", ' ')) { - auto fRule = getFRule(kvs); - curCfg.osmBuildOpts.dropFilter[fRule.kv.first].insert( - osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); - } - } - - if (p.hasKey(secStr, "osm_max_snap_level")) { - curCfg.osmBuildOpts.maxSnapLevel = - p.getInt(sec.first, "osm_max_snap_level"); - } else { - curCfg.osmBuildOpts.maxSnapLevel = 7; - } - - if (p.hasKey(secStr, "osm_filter_nohup")) { - for (const auto& kvs : - p.getStrArr(sec.first, "osm_filter_nohup", ' ')) { - auto fRule = getFRule(kvs); - curCfg.osmBuildOpts.noHupFilter[fRule.kv.first].insert( - osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); - } - } - - if (p.hasKey(secStr, "osm_filter_oneway")) { - for (const auto& kvs : - p.getStrArr(sec.first, "osm_filter_oneway", ' ')) { - auto fRule = getFRule(kvs); - curCfg.osmBuildOpts.oneWayFilter[fRule.kv.first].insert( - osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); - } - } - - if (p.hasKey(secStr, "osm_filter_oneway_reverse")) { - for (const auto& kvs : - p.getStrArr(sec.first, "osm_filter_oneway_reverse", ' ')) { - auto fRule = getFRule(kvs); - curCfg.osmBuildOpts.oneWayFilterRev[fRule.kv.first].insert( - osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); - } - } - - if (p.hasKey(secStr, "osm_filter_undirected")) { - for (const auto& kvs : - p.getStrArr(sec.first, "osm_filter_undirected", ' ')) { - auto fRule = getFRule(kvs); - curCfg.osmBuildOpts.twoWayFilter[fRule.kv.first].insert( - osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); - } - } - - if (p.hasKey(secStr, "osm_filter_station")) { - for (const auto& kvs : - p.getStrArr(sec.first, "osm_filter_station", ' ')) { - auto fRule = getFRule(kvs); - curCfg.osmBuildOpts.stationFilter[fRule.kv.first].insert( - osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); - } - } - - if (p.hasKey(secStr, "osm_filter_station_blocker")) { - for (const auto& kvs : - p.getStrArr(sec.first, "osm_filter_station_blocker", ' ')) { - auto fRule = getFRule(kvs); - curCfg.osmBuildOpts.stationBlockerFilter[fRule.kv.first].insert( - osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); - } - } - - if (p.hasKey(secStr, "osm_node_positive_restriction")) { - for (const auto& kvs : - p.getStrArr(sec.first, "osm_node_positive_restriction", ' ')) { - auto fRule = getFRule(kvs); - curCfg.osmBuildOpts.restrPosRestr[fRule.kv.first].insert( - osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); - } - } - - if (p.hasKey(secStr, "osm_node_negative_restriction")) { - for (const auto& kvs : - p.getStrArr(sec.first, "osm_node_negative_restriction", ' ')) { - auto fRule = getFRule(kvs); - curCfg.osmBuildOpts.restrNegRestr[fRule.kv.first].insert( - osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); - } - } - - if (p.hasKey(secStr, "osm_filter_no_restriction")) { - for (const auto& kvs : - p.getStrArr(sec.first, "osm_filter_no_restriction", ' ')) { - auto fRule = getFRule(kvs); - curCfg.osmBuildOpts.noRestrFilter[fRule.kv.first].insert( - osm::AttrFlagPair(fRule.kv.second, getFlags(fRule.flags))); - } - } - - if (p.hasKey(secStr, "osm_station_name_attrs")) { - for (const std::string& r : - p.getStrArr(sec.first, "osm_station_name_attrs", ' ')) { - curCfg.osmBuildOpts.statAttrRules.nameRule.push_back( - getDeepAttrRule(r)); - } - } - - if (p.hasKey(secStr, "osm_track_number_tags")) { - for (const std::string& r : - p.getStrArr(sec.first, "osm_track_number_tags", ' ')) { - curCfg.osmBuildOpts.statAttrRules.platformRule.push_back( - getDeepAttrRule(r)); - } - } - - if (p.hasKey(secStr, "osm_edge_track_number_tags")) { - for (const std::string& r : - p.getStrArr(sec.first, "osm_edge_track_number_tags", ' ')) { - curCfg.osmBuildOpts.edgePlatformRules.push_back(getDeepAttrRule(r)); - } - } - - if (p.hasKey(secStr, "osm_station_group_attrs")) { - auto arr = p.getStrArr(secStr, "osm_station_group_attrs", ' '); - - for (const auto& ruleStr : arr) { - auto deep = getDeepAttrRule(ruleStr); - // TODO(patrick): getKv is misused here as a a=b parser - auto attrD = getKv(deep.attr); - deep.attr = attrD.first; - double dist = atof(attrD.second.c_str()); - curCfg.osmBuildOpts.statGroupNAttrRules.push_back({deep, dist}); - } - } - - if (p.hasKey(secStr, "osm_line_relation_tags")) { - auto arr = p.getStrArr(secStr, "osm_line_relation_tags", ' '); - - for (const auto& ruleStr : arr) { - auto rule = getKv(ruleStr); - auto tags = util::split(rule.second, ','); - if (rule.first == "from_name") - curCfg.osmBuildOpts.relLinerules.fromNameRule = tags; - else if (rule.first == "to_name") - curCfg.osmBuildOpts.relLinerules.toNameRule = tags; - else if (rule.first == "line_name") - curCfg.osmBuildOpts.relLinerules.sNameRule = tags; - } - } - - if (p.hasKey(secStr, "osm_max_snap_distance")) { - curCfg.osmBuildOpts.maxSnapDistances = - p.getDoubleArr(secStr, "osm_max_snap_distance", ','); - } else { - curCfg.osmBuildOpts.maxSnapDistances.push_back(50); - } - - if (p.hasKey(secStr, "osm_max_snap_fallback_distance")) { - curCfg.osmBuildOpts.maxSnapFallbackHeurDistance = - p.getDouble(secStr, "osm_max_snap_fallback_distance"); - } else { - curCfg.osmBuildOpts.maxSnapFallbackHeurDistance = - *std::max_element(curCfg.osmBuildOpts.maxSnapDistances.begin(), - curCfg.osmBuildOpts.maxSnapDistances.end()) * - 2; - } - - if (p.hasKey(secStr, "osm_max_group_search_distance")) { - curCfg.osmBuildOpts.maxGroupSearchDistance = - p.getDouble(secStr, "osm_max_group_search_distance"); - } else { - curCfg.osmBuildOpts.maxGroupSearchDistance = - *std::max_element(curCfg.osmBuildOpts.maxSnapDistances.begin(), - curCfg.osmBuildOpts.maxSnapDistances.end()) * - 4; - } - - if (p.hasKey(secStr, "osm_max_osm_station_distance")) { - curCfg.osmBuildOpts.maxOsmStationDistance = - p.getDouble(secStr, "osm_max_osm_station_distance"); - } else { - curCfg.osmBuildOpts.maxOsmStationDistance = 5; - } - - if (p.hasKey(secStr, "osm_max_node_block_distance")) { - curCfg.osmBuildOpts.maxBlockDistance = - p.getDouble(secStr, "osm_max_node_block_distance"); - } else { - curCfg.osmBuildOpts.maxBlockDistance = - *std::max_element(curCfg.osmBuildOpts.maxSnapDistances.begin(), - curCfg.osmBuildOpts.maxSnapDistances.end()) / - 8; - } - - for (uint8_t i = 0; i < 8; i++) { - std::string name = - std::string("routing_lvl") + std::to_string(i) + "_fac"; - if (p.hasKey(secStr, name)) { - double v = p.getDouble(sec.first, name); - curCfg.routingOpts.levelPunish[i] = v; - } else { - curCfg.routingOpts.levelPunish[i] = 1; - } - } - - if (p.hasKey(secStr, "routing_full_turn_punish")) { - curCfg.routingOpts.fullTurnPunishFac = - p.getDouble(secStr, "routing_full_turn_punish"); - } - if (p.hasKey(secStr, "routing_full_turn_angle")) { - double ang = p.getDouble(secStr, "routing_full_turn_angle"); - curCfg.routingOpts.fullTurnAngle = ang; - } else { - curCfg.routingOpts.fullTurnAngle = 5; - } - if (p.hasKey(secStr, "routing_snap_full_turn_angle")) { - double ang = p.getDouble(secStr, "routing_snap_full_turn_angle"); - curCfg.osmBuildOpts.maxAngleSnapReach = ang; - } else { - curCfg.osmBuildOpts.maxAngleSnapReach = - curCfg.routingOpts.fullTurnAngle; - } - if (p.hasKey(secStr, "routing_pass_thru_station_punish")) { - curCfg.routingOpts.passThruStationsPunish = - p.getDouble(secStr, "routing_pass_thru_station_punish"); - } - if (p.hasKey(secStr, "routing_one_way_meter_punish_fac")) { - curCfg.routingOpts.oneWayPunishFac = - p.getDouble(secStr, "routing_one_way_meter_punish_fac"); - } - if (p.hasKey(secStr, "routing_one_way_edge_punish")) { - curCfg.routingOpts.oneWayEdgePunish = - p.getDouble(secStr, "routing_one_way_edge_punish"); - } - if (p.hasKey(secStr, "routing_line_unmatched_punish_fac")) { - curCfg.routingOpts.lineUnmatchedPunishFact = - p.getDouble(secStr, "routing_line_unmatched_punish_fac"); - } - if (p.hasKey(secStr, "routing_platform_unmatched_punish")) { - curCfg.routingOpts.platformUnmatchedPen = - p.getDouble(secStr, "routing_platform_unmatched_punish"); - } - if (p.hasKey(secStr, "routing_non_osm_station_punish")) { - curCfg.routingOpts.nonOsmPen = - p.getDouble(secStr, "routing_non_osm_station_punish"); - } else { - curCfg.routingOpts.nonOsmPen = 0; - } - if (p.hasKey(secStr, "routing_station_distance_punish_fac")) { - curCfg.routingOpts.stationDistPenFactor = - p.getDouble(secStr, "routing_station_distance_punish_fac"); - } else { - curCfg.routingOpts.stationDistPenFactor = 1; - } - - if (p.hasKey(secStr, "station_normalize_chain")) { - try { - auto arr = p.getStrArr(secStr, "station_normalize_chain", ';'); - curCfg.osmBuildOpts.statNormzer = - trgraph::Normalizer(getNormRules(arr)); - } catch (const std::exception& e) { - throw ParseExc(p.getVal(secStr, "station_normalize_chain").line, - p.getVal(secStr, "station_normalize_chain").pos, - "", - std::string("", s); - } - } - - if (p.hasKey(secStr, "track_normalize_chain")) { - try { - auto arr = p.getStrArr(secStr, "track_normalize_chain", ';'); - curCfg.osmBuildOpts.trackNormzer = - trgraph::Normalizer(getNormRules(arr)); - } catch (const std::exception& e) { - throw ParseExc(p.getVal(secStr, "track_normalize_chain").line, - p.getVal(secStr, "station_normalize_chain").pos, - "", - std::string("", s); - } - } - - if (p.hasKey(secStr, "line_normalize_chain")) { - try { - auto arr = p.getStrArr(secStr, "line_normalize_chain", ';'); - curCfg.osmBuildOpts.lineNormzer = - trgraph::Normalizer(getNormRules(arr)); - } catch (const std::exception& e) { - throw ParseExc(p.getVal(secStr, "station_normalize_chain").line, - p.getVal(secStr, "station_normalize_chain").pos, - "", - std::string("", s); - } - } - - bool found = false; - - for (auto& cfg : _cfgs) { - if (cfg == curCfg) { - for (auto mot : Route::getTypesFromString(secStr)) { - cfg.mots.insert(mot); - } - found = true; - break; - } - } - - if (!found) { - curCfg.mots = Route::getTypesFromString(secStr); - _cfgs.push_back(curCfg); - } + if (!found) { + cfg.mots = ad::cppgtfs::gtfs::flat::Route::getTypesFromString(secStr); + _cfgs.push_back(cfg); } } } diff --git a/src/pfaedle/config/MotConfigReader.h b/src/pfaedle/config/MotConfigReader.h index 912a148..86e3ffb 100644 --- a/src/pfaedle/config/MotConfigReader.h +++ b/src/pfaedle/config/MotConfigReader.h @@ -5,12 +5,13 @@ #ifndef PFAEDLE_CONFIG_MOTCONFIGREADER_H_ #define PFAEDLE_CONFIG_MOTCONFIGREADER_H_ -#include -#include #include #include +#include +#include #include "ad/cppgtfs/gtfs/Route.h" #include "configparser/ConfigFileParser.h" +#include "pfaedle/_config.h" #include "pfaedle/config/MotConfig.h" #include "pfaedle/osm/OsmBuilder.h" @@ -22,7 +23,7 @@ using ad::cppgtfs::gtfs::Route; class MotConfigReader { public: MotConfigReader(); - void parse(const std::vector& paths); + void parse(const std::vector& paths, const std::string& literal); const std::vector& getConfigs() const; diff --git a/src/pfaedle/config/PfaedleConfig.h b/src/pfaedle/config/PfaedleConfig.h index ba9a2a0..211ada3 100644 --- a/src/pfaedle/config/PfaedleConfig.h +++ b/src/pfaedle/config/PfaedleConfig.h @@ -5,11 +5,13 @@ #ifndef PFAEDLE_CONFIG_PFAEDLECONFIG_H_ #define PFAEDLE_CONFIG_PFAEDLECONFIG_H_ +#include #include #include #include -#include + #include "ad/cppgtfs/gtfs/Route.h" +#include "util/geo/Geo.h" namespace pfaedle { namespace config { @@ -18,35 +20,55 @@ using ad::cppgtfs::gtfs::Route; struct Config { Config() - : dbgOutputPath("geo"), + : dbgOutputPath("."), solveMethod("global"), - evalPath("."), outputPath("gtfs-out"), dropShapes(false), useHMM(false), writeGraph(false), - writeCombGraph(false), - evaluate(false), buildTransitGraph(false), - gridSize(2000) {} + useCaching(false), + writeOverpass(false), + writeOsmfilter(false), + inPlace(false), + writeColors(false), + noFastHops(false), + noAStar(false), + noTrie(false), + noHopCache(false), + writeStats(false), + parseAdditionalGTFSFields(false), + gridSize(2000 / util::geo::M_PER_DEG), + gaussianNoise(0), + verbosity(0) {} std::string dbgOutputPath; std::string solveMethod; - std::string evalPath; std::string shapeTripId; std::string outputPath; std::string writeOsm; std::string osmPath; - std::string evalDfBins; + std::string motCfgParam; std::vector feedPaths; std::vector configPaths; std::set mots; bool dropShapes; bool useHMM; bool writeGraph; - bool writeCombGraph; - bool evaluate; bool buildTransitGraph; + bool useCaching; + bool writeOverpass; + bool writeOsmfilter; + bool inPlace; + bool writeColors; + bool noFastHops; + bool noAStar; + bool noTrie; + bool noHopCache; + bool writeStats; + bool parseAdditionalGTFSFields; double gridSize; + double gaussianNoise; + uint8_t verbosity; std::string toString() { std::stringstream ss; @@ -58,8 +80,19 @@ struct Config { << "drop-shapes: " << dropShapes << "\n" << "use-hmm: " << useHMM << "\n" << "write-graph: " << writeGraph << "\n" - << "write-cgraph: " << writeCombGraph << "\n" << "grid-size: " << gridSize << "\n" + << "use-cache: " << useCaching << "\n" + << "write-overpass: " << writeOverpass << "\n" + << "write-osmfilter: " << writeOsmfilter << "\n" + << "inplace: " << inPlace << "\n" + << "write-colors: " << writeColors << "\n" + << "no-fast-hops: " << noFastHops << "\n" + << "no-a-star: " << noAStar << "\n" + << "no-trie: " << noTrie << "\n" + << "no-hop-cache: " << noHopCache << "\n" + << "verbosity: " << verbosity << "\n" + << "parse-additional-gtfs-fields: " << parseAdditionalGTFSFields << "\n" + << "write-stats: " << writeStats << "\n" << "feed-paths: "; for (const auto& p : feedPaths) { diff --git a/src/pfaedle/eval/Collector.cpp b/src/pfaedle/eval/Collector.cpp deleted file mode 100644 index 334641f..0000000 --- a/src/pfaedle/eval/Collector.cpp +++ /dev/null @@ -1,417 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#include -#include -#include -#include -#include -#include "ad/cppgtfs/gtfs/Feed.h" -#include "pfaedle/eval/Collector.h" -#include "pfaedle/eval/Result.h" -#include "util/geo/Geo.h" -#include "util/geo/PolyLine.h" -#include "util/geo/output/GeoJsonOutput.h" -#include "util/log/Log.h" - -using util::geo::FLine; -using util::geo::PolyLine; -using util::geo::FPoint; -using ad::cppgtfs::gtfs::Trip; -using ad::cppgtfs::gtfs::Shape; -using pfaedle::eval::Collector; -using pfaedle::eval::Result; -using util::geo::output::GeoJsonOutput; - -// _____________________________________________________________________________ -double Collector::add(const Trip* t, const Shape* oldS, const Shape* newS, - const std::vector& newTripDists) { - if (!oldS) { - _noOrigShp++; - return 0; - } - - for (auto st : t->getStopTimes()) { - if (st.getShapeDistanceTravelled() < 0) { - // we cannot safely compare trips without shape dist travelled - // info - _noOrigShp++; - return 0; - } - } - - double fd = 0; - size_t unmatchedSegments; - double unmatchedSegmentsLength; - - std::vector oldDists; - FLine oldL = getWebMercLine( - oldS, t->getStopTimes().begin()->getShapeDistanceTravelled(), - (--t->getStopTimes().end())->getShapeDistanceTravelled(), &oldDists); - - std::vector newDists; - FLine newL = getWebMercLine(newS, -1, -1, &newDists); - - std::ofstream fstr(_evalOutPath + "/trip-" + t->getId() + ".json"); - GeoJsonOutput gjout(fstr); - - auto oldSegs = segmentize(t, oldL, oldDists, 0); - auto newSegs = segmentize(t, newL, newDists, &newTripDists); - - // cut both result at the beginning and end to clear evaluation from - // loops at the end - PolyLine oldStart = oldSegs[0]; - PolyLine newStart = newSegs[0]; - auto oldStartNew = - oldStart.getSegment(oldStart.projectOn(newSegs[0][0]).totalPos, 1); - auto newStartNew = - newStart.getSegment(newStart.projectOn(oldSegs[0][0]).totalPos, 1); - if (fabs(oldStartNew.getLength() - oldStart.getLength()) / - oldStart.getLength() < - 0.5 && - fabs(newStartNew.getLength() - newStart.getLength()) / - newStart.getLength() < - 0.5) { - oldSegs[0] = oldStartNew.getLine(); - newSegs[0] = newStartNew.getLine(); - } - - PolyLine oldEnd = oldSegs[oldSegs.size() - 1]; - PolyLine newEnd = newSegs[oldSegs.size() - 1]; - auto oldEndNew = - oldEnd.getSegment(0, oldEnd.projectOn(newSegs.back().back()).totalPos); - auto newEndNew = - newEnd.getSegment(0, newEnd.projectOn(oldSegs.back().back()).totalPos); - if (fabs(oldEndNew.getLength() - oldEnd.getLength()) / oldEnd.getLength() < - 0.5 && - fabs(newEndNew.getLength() - newEnd.getLength()) / newEnd.getLength() < - 0.5) { - oldSegs[oldSegs.size() - 1] = oldEndNew.getLine(); - newSegs[newSegs.size() - 1] = newEndNew.getLine(); - } - - // check for suspicious (most likely erroneous) lines in the - // ground truth data which have a long straight-line segment - - for (auto oldL : oldSegs) { - for (size_t i = 1; i < oldL.size(); i++) { - if (util::geo::webMercMeterDist(oldL[i - 1], oldL[i]) > 500) { - // return 0; - } - } - } - - // new lines build from cleaned-up shapes - FLine oldLCut; - FLine newLCut; - - for (auto oldL : oldSegs) { - gjout.print(oldL, {{"ver", "old"}}); - oldLCut.insert(oldLCut.end(), oldL.begin(), oldL.end()); - } - for (auto newL : newSegs) { - gjout.print(newL, {{"ver", "new"}}); - newLCut.insert(newLCut.end(), newL.begin(), newL.end()); - } - - gjout.flush(); - fstr.close(); - - double fac = cos(2 * atan(exp((oldSegs.front().front().getY() + - oldSegs.back().back().getY()) / - 6378137.0)) - - 1.5707965); - - if (_dCache.count(oldS) && _dCache.find(oldS)->second.count(newS)) { - fd = _dCache[oldS][newS]; - } else { - fd = util::geo::accFrechetDistC(oldLCut, newLCut, 5 / fac) * fac; - _dCache[oldS][newS] = fd; - } - - if (_dACache.count(oldS) && _dACache.find(oldS)->second.count(newS)) { - unmatchedSegments = _dACache[oldS][newS].first; - unmatchedSegmentsLength = _dACache[oldS][newS].second; - } else { - auto dA = getDa(oldSegs, newSegs); - _dACache[oldS][newS] = dA; - unmatchedSegments = dA.first; - unmatchedSegmentsLength = dA.second; - } - - double totL = 0; - for (auto l : oldSegs) totL += util::geo::len(l) * fac; - - // filter out shapes with a lenght of under 5 meters - they are most likely - // artifacts - if (totL < 5) { - _noOrigShp++; - return 0; - } - - _fdSum += fd / totL; - _unmatchedSegSum += unmatchedSegments; - _unmatchedSegLengthSum += unmatchedSegmentsLength; - _results.insert(Result(t, fd / totL)); - _resultsAN.insert(Result(t, static_cast(unmatchedSegments) / - static_cast(oldSegs.size()))); - _resultsAL.insert(Result(t, unmatchedSegmentsLength / totL)); - - LOG(DEBUG) << "This result (" << t->getId() - << "): A_N/N = " << unmatchedSegments << "/" << oldSegs.size() - << " = " - << static_cast(unmatchedSegments) / - static_cast(oldSegs.size()) - << " A_L/L = " << unmatchedSegmentsLength << "/" << totL << " = " - << unmatchedSegmentsLength / totL << " d_f = " << fd; - - - return fd; -} - -// _____________________________________________________________________________ -std::vector Collector::segmentize( - const Trip* t, const FLine& shape, const std::vector& dists, - const std::vector* newTripDists) { - std::vector ret; - - if (t->getStopTimes().size() < 2) return ret; - - util::geo::PolyLine pl(shape); - std::vector > cuts; - - size_t i = 0; - for (auto st : t->getStopTimes()) { - if (newTripDists) { - cuts.push_back(std::pair( - util::geo::latLngToWebMerc(st.getStop()->getLat(), - st.getStop()->getLng()), - (*newTripDists)[i])); - } else { - cuts.push_back(std::pair( - util::geo::latLngToWebMerc(st.getStop()->getLat(), - st.getStop()->getLng()), - st.getShapeDistanceTravelled())); - } - i++; - } - - // get first half of geometry, and search for start point there! - size_t before = std::upper_bound(dists.begin(), dists.end(), cuts[1].second) - - dists.begin(); - util::geo::PolyLine l( - FLine(shape.begin(), shape.begin() + before + 1)); - auto lastLp = l.projectOn(cuts.front().first); - - for (size_t i = 1; i < cuts.size(); i++) { - size_t before = shape.size(); - if (i < cuts.size() - 1 && cuts[i + 1].second > -0.5) { - before = - std::upper_bound(dists.begin(), dists.end(), cuts[i + 1].second) - - dists.begin(); - } - - util::geo::PolyLine beforePl( - FLine(shape.begin(), shape.begin() + before)); - - auto curLp = beforePl.projectOnAfter(cuts[i].first, lastLp.lastIndex); - - ret.push_back(pl.getSegment(lastLp, curLp).getLine()); - lastLp = curLp; - } - - // std::raise(SIGABRT); - return ret; -} - -// _____________________________________________________________________________ -FLine Collector::getWebMercLine(const Shape* s, double from, double t) { - return getWebMercLine(s, from, t, 0); -} - -// _____________________________________________________________________________ -FLine Collector::getWebMercLine(const Shape* s, double from, double to, - std::vector* dists) { - FLine ret; - - auto i = s->getPoints().begin(); - - for (; i != s->getPoints().end(); i++) { - auto p = *i; - - if ((from < 0 || (p.travelDist - from) > -0.01)) { - if (to >= 0 && (p.travelDist - to) > 0.01) break; - - FPoint mercP = util::geo::latLngToWebMerc(p.lat, p.lng); - - ret.push_back(mercP); - if (dists) dists->push_back(p.travelDist); - } - } - - return ret; -} - -// _____________________________________________________________________________ -const std::set& Collector::getResults() const { return _results; } - -// _____________________________________________________________________________ -double Collector::getAvgDist() const { return _fdSum / _results.size(); } - -// _____________________________________________________________________________ -void Collector::printHisto(std::ostream* os, const std::set& result, - const std::vector& bins) const { - size_t W = 60; - - auto it = result.begin(); - std::vector > res; - std::vector examples; - size_t maxC = 0; - - for (size_t i = 0; i < bins.size(); i++) { - size_t c = 0; - const Trip* trip = 0; - - while (it != result.end() && it->getDist() <= (bins[i] + 0.001)) { - if (!trip) trip = it->getTrip(); - c++; - it++; - } - - if (c > maxC) maxC = c; - - examples.push_back(trip); - res.push_back(std::pair(bins[i], c)); - } - - size_t j = 0; - for (auto r : res) { - std::string range = util::toString(r.first); - (*os) << " < " << std::setfill(' ') << std::setw(10) << range << ": "; - size_t i = 0; - - for (; i < W * (static_cast(r.second) / static_cast(maxC)); - i++) { - (*os) << "|"; - } - - if (r.second) - (*os) << " (" << r.second << ", e.g. #" << examples[j]->getId() << ")"; - (*os) << std::endl; - j++; - } -} - -// _____________________________________________________________________________ -std::vector Collector::getBins(double mind, double maxd, size_t steps) { - double bin = (maxd - mind) / steps; - double curE = mind + bin; - - std::vector ret; - while (curE <= maxd) { - ret.push_back(curE); - curE += bin; - } - return ret; -} - -// _____________________________________________________________________________ -void Collector::printCsv(std::ostream* os, const std::set& result, - const std::vector& bins) const { - auto it = result.begin(); - std::vector > res; - - for (size_t i = 0; i < bins.size(); i++) { - size_t c = 0; - const Trip* trip = 0; - - while (it != result.end() && it->getDist() <= (bins[i] + 0.001)) { - if (!trip) trip = it->getTrip(); - c++; - it++; - } - - res.push_back(std::pair(bins[i], c)); - } - - (*os) << "range, count\n"; - for (auto r : res) { - (*os) << r.first << "," << r.second << "\n"; - } -} - -// _____________________________________________________________________________ -void Collector::printStats(std::ostream* os) const { - size_t buckets = 10; - (*os) << "\n ===== Evalution results =====\n\n"; - - (*os) << std::setfill(' ') << std::setw(30) - << " # of trips new shapes were matched for: " << _results.size() - << "\n"; - (*os) << std::setw(30) << " # of trips without input shapes: " << _noOrigShp - << "\n"; - - if (_results.size()) { - (*os) << std::setw(30) << " highest distance to input shapes: " - << (--_results.end())->getDist() << " (on trip #" - << (--_results.end())->getTrip()->getId() << ")\n"; - (*os) << std::setw(30) << " lowest distance to input shapes: " - << (_results.begin())->getDist() << " (on trip #" - << (_results.begin())->getTrip()->getId() << ")\n"; - (*os) << std::setw(30) << " avg total frechet distance: " << getAvgDist() - << "\n"; - - std::vector dfBins = getBins( - (_results.begin())->getDist(), (--_results.end())->getDist(), buckets); - - if (_dfBins.size()) dfBins = _dfBins; - - (*os) << "\n -- Histogram of d_f for this run -- " << std::endl; - printHisto(os, _results, dfBins); - - std::ofstream fstr1(_evalOutPath + "/eval-frechet.csv"); - printCsv(&fstr1, _results, dfBins); - - (*os) << "\n\n\n -- Histogram of A_N/N for this run -- " << std::endl; - printHisto(os, _resultsAN, - getBins((_resultsAN.begin())->getDist(), - (--_resultsAN.end())->getDist(), buckets)); - std::ofstream fstr2(_evalOutPath + "/eval-AN.csv"); - printCsv(&fstr2, _resultsAN, getBins(0, 1, 20)); - - (*os) << "\n\n\n -- Histogram of A_L/L for this run -- " << std::endl; - printHisto(os, _resultsAL, - getBins((_resultsAL.begin())->getDist(), - (--_resultsAL.end())->getDist(), buckets)); - std::ofstream fstr3(_evalOutPath + "/eval-AL.csv"); - printCsv(&fstr3, _resultsAL, getBins(0, 1, 20)); - } - - (*os) << "\n ===== End of evaluation results =====\n"; - (*os) << std::endl; -} - -// _____________________________________________________________________________ -std::pair Collector::getDa(const std::vector& a, - const std::vector& b) { - assert(a.size() == b.size()); - std::pair ret{0, 0}; - - // euclidean distance on web mercator is in meters on equator, - // and proportional to cos(lat) in both y directions - double fac = - cos(2 * atan(exp((a.front().front().getY() + a.back().back().getY()) / - 6378137.0)) - - 1.5707965); - - for (size_t i = 0; i < a.size(); i++) { - double fd = util::geo::frechetDist(a[i], b[i], 3 / fac) * fac; - if (fd >= 20) { - ret.first++; - ret.second += util::geo::len(a[i]) * fac; - } - } - - return ret; -} diff --git a/src/pfaedle/eval/Collector.h b/src/pfaedle/eval/Collector.h deleted file mode 100644 index 9fe7def..0000000 --- a/src/pfaedle/eval/Collector.h +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef PFAEDLE_EVAL_COLLECTOR_H_ -#define PFAEDLE_EVAL_COLLECTOR_H_ - -#include -#include -#include -#include -#include -#include -#include "ad/cppgtfs/gtfs/Feed.h" -#include "pfaedle/eval/Result.h" -#include "util/geo/Geo.h" - -using ad::cppgtfs::gtfs::Trip; -using ad::cppgtfs::gtfs::Shape; -using util::geo::FLine; - -namespace pfaedle { -namespace eval { - -/* - * Collects routing results for evaluation - */ -class Collector { - public: - Collector(const std::string& evalOutPath, const std::vector& dfBins) - : _noOrigShp(0), - _noMatchShp(0), - _fdSum(0), - _unmatchedSegSum(0), - _unmatchedSegLengthSum(0), - _evalOutPath(evalOutPath), - _dfBins(dfBins) {} - - // Add a shape found by our tool newS for a trip t with newly calculated - // station dist values with the old shape oldS - double add(const Trip* t, const Shape* oldS, const Shape* newS, - const std::vector& newDists); - - // Return the set of all Result objects - const std::set& getResults() const; - - // Print general stats to os - void printStats(std::ostream* os) const; - - // Print histogramgs for the results to os - void printHisto(std::ostream* os, const std::set& result, - const std::vector& bins) const; - - // Print a CSV for the results to os - void printCsv(std::ostream* os, const std::set& result, - const std::vector& bins) const; - - // Return the averaged average frechet distance - double getAvgDist() const; - - static FLine getWebMercLine(const Shape* s, double from, double to); - static FLine getWebMercLine(const Shape* s, double from, double to, - std::vector* dists); - - private: - std::set _results; - std::set _resultsAN; - std::set _resultsAL; - std::map > _dCache; - std::map > > - _dACache; - size_t _noOrigShp; - size_t _noMatchShp; - - double _fdSum; - size_t _unmatchedSegSum; - double _unmatchedSegLengthSum; - - std::string _evalOutPath; - - std::vector _dfBins; - - static std::pair getDa(const std::vector& a, - const std::vector& b); - - static std::vector segmentize(const Trip* t, const FLine& shape, - const std::vector& dists, - const std::vector* newTripDists); - - static std::vector getBins(double mind, double maxd, size_t steps); -}; - -} // namespace eval -} // namespace pfaedle - -#endif // PFAEDLE_EVAL_COLLECTOR_H_ diff --git a/src/pfaedle/gtfs/Feed.h b/src/pfaedle/gtfs/Feed.h new file mode 100644 index 0000000..b6a04b9 --- /dev/null +++ b/src/pfaedle/gtfs/Feed.h @@ -0,0 +1,40 @@ +// Copyright 2016, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_GTFS_FEED_H_ +#define PFAEDLE_GTFS_FEED_H_ + +#include + +#include "Service.h" +#include "ShapeContainer.h" +#include "StopTime.h" +#include "ad/cppgtfs/gtfs/ContContainer.h" +#include "ad/cppgtfs/gtfs/Feed.h" +#include "ad/cppgtfs/gtfs/NullContainer.h" +#include "ad/cppgtfs/gtfs/Stop.h" +#include "ad/cppgtfs/gtfs/StopTime.h" +#include "ad/cppgtfs/gtfs/Trip.h" + +namespace pfaedle { +namespace gtfs { + +typedef ad::cppgtfs::gtfs::FeedB< + ad::cppgtfs::gtfs::Agency, ad::cppgtfs::gtfs::Route, + ad::cppgtfs::gtfs::Stop, Service, StopTime, Shape, ad::cppgtfs::gtfs::Fare, + ad::cppgtfs::gtfs::Level, ad::cppgtfs::gtfs::Pathway, + ad::cppgtfs::gtfs::Container, ad::cppgtfs::gtfs::Container, + ad::cppgtfs::gtfs::NullContainer, ad::cppgtfs::gtfs::ContContainer, + ad::cppgtfs::gtfs::ContContainer, ShapeContainer, + ad::cppgtfs::gtfs::Container, ad::cppgtfs::gtfs::Container, + ad::cppgtfs::gtfs::Container> + Feed; +typedef ad::cppgtfs::gtfs::TripB, Service, + ad::cppgtfs::gtfs::Route, Shape> + Trip; + +} // namespace gtfs +} // namespace pfaedle + +#endif // PFAEDLE_GTFS_FEED_H_ diff --git a/src/pfaedle/gtfs/Service.h b/src/pfaedle/gtfs/Service.h new file mode 100644 index 0000000..245ce31 --- /dev/null +++ b/src/pfaedle/gtfs/Service.h @@ -0,0 +1,43 @@ +// Copyright 2016, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_GTFS_SERVICE_H_ +#define PFAEDLE_GTFS_SERVICE_H_ + +#include +#include "ad/cppgtfs/gtfs/Service.h" +#include "util/Misc.h" + +namespace pfaedle { +namespace gtfs { + +class Service { + public: + typedef std::string Ref; + static std::string getId(Ref r) { return r; } + + explicit Service(const string& id) : _id(id) {} + Service(const string& id, uint8_t serviceDays, + ad::cppgtfs::gtfs::ServiceDate start, + ad::cppgtfs::gtfs::ServiceDate end) + : _id(id) { + UNUSED(serviceDays); + UNUSED(start); + UNUSED(end); + } + + const std::string& getId() const { return _id; } + void addException(const ad::cppgtfs::gtfs::ServiceDate& d, + ad::cppgtfs::gtfs::Service::EXCEPTION_TYPE t) { + UNUSED(d); + UNUSED(t); + } + + private: + std::string _id; +}; +} // namespace gtfs +} // namespace pfaedle + +#endif // PFAEDLE_GTFS_SERVICE_H_ diff --git a/src/pfaedle/gtfs/ShapeContainer.h b/src/pfaedle/gtfs/ShapeContainer.h new file mode 100644 index 0000000..be8a9b5 --- /dev/null +++ b/src/pfaedle/gtfs/ShapeContainer.h @@ -0,0 +1,72 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_GTFS_SHAPECONTAINER_H_ +#define PFAEDLE_GTFS_SHAPECONTAINER_H_ + +#include +#include +#include +#include +#include +#include +#include "ad/cppgtfs/gtfs/Shape.h" +#include "ad/cppgtfs/gtfs/flat/Shape.h" +#include "pfaedle/Def.h" +#include "util/Misc.h" + +namespace pfaedle { +namespace gtfs { + +struct Shape { + explicit Shape(const std::string& id) : id(id) {} + typedef std::string Ref; + static std::string getId(Ref r) { return r; } + + template + bool addPoint(T p) { + UNUSED(p); + return true; + } + + const std::string& getId() const { return id; } + + std::string id; +}; + +template +class ShapeContainer { + public: + ShapeContainer(); + ~ShapeContainer(); + T* add(const T& obj); + bool remove(const std::string& id); + const T* get(const std::string& id) const; + T* get(const std::string& id); + const std::string getRef(const std::string& id) const; + std::string getRef(const std::string& id); + size_t size() const; + void finalize() {} + bool has(const std::string& id) const; + + std::string add(const ad::cppgtfs::gtfs::Shape& s); + void open(); + bool nextStoragePt(ad::cppgtfs::gtfs::flat::ShapePoint* ret); + + private: + std::set _ids; + std::fstream _storage; + size_t _ptr; + size_t _max; + std::string _curId; + std::stringstream _writeBuffer; + std::fpos _lastBuff; +}; + +#include "ShapeContainer.tpp" + +} // namespace gtfs +} // namespace pfaedle + +#endif // PFAEDLE_GTFS_SHAPECONTAINER_H_ diff --git a/src/pfaedle/gtfs/ShapeContainer.tpp b/src/pfaedle/gtfs/ShapeContainer.tpp new file mode 100644 index 0000000..7d3d305 --- /dev/null +++ b/src/pfaedle/gtfs/ShapeContainer.tpp @@ -0,0 +1,158 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#include +#include + +// ____________________________________________________________________________ +template +ShapeContainer::ShapeContainer() : _lastBuff(0) { + std::string f = util::getTmpFName("", ".pfaedle-tmp", ""); + _storage.open(f, std::fstream::in | std::fstream::out | std::fstream::trunc); + + // immediately unlink + unlink(f.c_str()); + + if (!_storage.good()) { + std::cerr << "Could not open temporary file " << f << std::endl; + exit(1); + } +} + +// ____________________________________________________________________________ +template +ShapeContainer::~ShapeContainer() { + _storage << _writeBuffer.rdbuf(); + _storage.flush(); + _storage.close(); +} + +// ____________________________________________________________________________ +template +T* ShapeContainer::add(const T& ent) { + _ids.insert(ent.getId()); + return reinterpret_cast(1); +} + +// ____________________________________________________________________________ +template +bool ShapeContainer::remove(const std::string& id) { + _ids.erase(id); + return true; +} + +// ____________________________________________________________________________ +template +T* ShapeContainer::get(const std::string& id) { + UNUSED(id); + return reinterpret_cast(0); +} + +// ____________________________________________________________________________ +template +const T* ShapeContainer::get(const std::string& id) const { + UNUSED(id); + return reinterpret_cast(0); +} + +// ____________________________________________________________________________ +template +bool ShapeContainer::has(const std::string& id) const { + return _ids.count(id); +} + +// ____________________________________________________________________________ +template +size_t ShapeContainer::size() const { + return _ids.size(); +} + +// ____________________________________________________________________________ +template +std::string ShapeContainer::add(const ad::cppgtfs::gtfs::Shape& s) { + if (has(s.getId())) return s.getId(); + size_t size = s.getPoints().size(); + _ids.insert(s.getId()); + + _writeBuffer << s.getId(); + _writeBuffer.put(' '); + _writeBuffer.write(reinterpret_cast(&size), sizeof(size)); + + for (const auto& p : s.getPoints()) { + _writeBuffer.write(reinterpret_cast(&p.lat), sizeof(p.lat)); + _writeBuffer.write(reinterpret_cast(&p.lng), sizeof(p.lng)); + _writeBuffer.write(reinterpret_cast(&p.travelDist), + sizeof(p.travelDist)); + } + + if (_writeBuffer.tellp() - _lastBuff > 1000 * 5000) { + _lastBuff = _writeBuffer.tellp(); + _storage << _writeBuffer.rdbuf(); + _writeBuffer.clear(); + } + + return s.getId(); +} + +// ____________________________________________________________________________ +template +void ShapeContainer::open() { + _storage << _writeBuffer.rdbuf(); + _storage.flush(); + _writeBuffer.clear(); + + _ptr = 0; + _max = 0; + _storage.clear(); + _storage.seekg(0, std::ios::beg); +} + +// ____________________________________________________________________________ +template +bool ShapeContainer::nextStoragePt( + ad::cppgtfs::gtfs::flat::ShapePoint* ret) { + while (_storage.good() && !_storage.fail()) { + if (!_ptr) { + _storage >> _curId; + _storage.ignore(); + + _storage.read(reinterpret_cast(&_max), sizeof(_max)); + } + + if (!_storage.good() || _storage.fail()) return false; + + _storage.read(reinterpret_cast(&ret->lat), sizeof(ret->lat)); + _storage.read(reinterpret_cast(&ret->lng), sizeof(ret->lng)); + _storage.read(reinterpret_cast(&ret->travelDist), + sizeof(ret->travelDist)); + + ret->seq = _ptr + 1; + ret->id = _curId; + + if (_ptr + 1 == _max) + _ptr = 0; + else + _ptr++; + + if (!_storage.good() || _storage.fail()) return false; + + if (has(ret->id)) return true; + } + + return false; +} + +// ____________________________________________________________________________ +template +const std::string ShapeContainer::getRef(const std::string& id) const { + if (!has(id)) return ""; + return id; +} + +// ____________________________________________________________________________ +template +std::string ShapeContainer::getRef(const std::string& id) { + if (!has(id)) return ""; + return id; +} diff --git a/src/pfaedle/gtfs/StopTime.h b/src/pfaedle/gtfs/StopTime.h new file mode 100644 index 0000000..db03334 --- /dev/null +++ b/src/pfaedle/gtfs/StopTime.h @@ -0,0 +1,74 @@ +// Copyright 2016, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_GTFS_STOPTIME_H_ +#define PFAEDLE_GTFS_STOPTIME_H_ + +#include +#include +#include +#include +#include "ad/cppgtfs/gtfs/Stop.h" +#include "ad/cppgtfs/gtfs/StopTime.h" +#include "util/Misc.h" + +using std::exception; +using std::string; + +namespace pfaedle { +namespace gtfs { + +template +class StopTime { + public: + StopTime(const ad::cppgtfs::gtfs::Time& at, const ad::cppgtfs::gtfs::Time& dt, + typename StopT::Ref s, uint32_t seq, const std::string& hs, + ad::cppgtfs::gtfs::flat::StopTime::PU_DO_TYPE put, + ad::cppgtfs::gtfs::flat::StopTime::PU_DO_TYPE dot, float distTrav, + bool isTp, uint8_t continuousDropOff, + uint8_t continuousPickup) + : _s(s), _sequence(seq), _dist(distTrav), _at(at), _dt(dt), _isTp(isTp) { + UNUSED(hs); + UNUSED(put); + UNUSED(dot); + UNUSED(distTrav); + UNUSED(continuousDropOff); + UNUSED(continuousPickup); + } + + const typename StopT::Ref getStop() const { return _s; } + typename StopT::Ref getStop() { return _s; } + void setShapeDistanceTravelled(double d) { _dist = d; } + + ad::cppgtfs::gtfs::Time getArrivalTime() const { + return _at; + } + ad::cppgtfs::gtfs::Time getDepartureTime() const { + return _dt; + } + + float getShapeDistanceTravelled() const { return _dist; } + + uint16_t getSeq() const { return _sequence; } + bool isTp() const { return _isTp; } + + private: + typename StopT::Ref _s; + uint32_t _sequence; + float _dist; + ad::cppgtfs::gtfs::Time _at, _dt; + bool _isTp; +}; + +template +struct StopTimeCompare { + bool operator()(const StopTimeT& lh, const StopTimeT& rh) const { + return lh.getSeq() < rh.getSeq(); + } +}; + +} // namespace gtfs +} // namespace pfaedle + +#endif // PFAEDLE_GTFS_STOPTIME_H_ diff --git a/src/pfaedle/gtfs/Writer.cpp b/src/pfaedle/gtfs/Writer.cpp new file mode 100644 index 0000000..c11dbdf --- /dev/null +++ b/src/pfaedle/gtfs/Writer.cpp @@ -0,0 +1,723 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#include + +#ifdef LIBZIP_FOUND +#include +#endif + +#include +#include +#include +#include +#include +#include + +#include "ad/cppgtfs/Parser.h" +#include "ad/cppgtfs/Writer.h" +#include "ad/cppgtfs/gtfs/flat/Agency.h" +#include "ad/util/CsvWriter.h" +#include "pfaedle/gtfs/Writer.h" + +using ad::cppgtfs::Parser; +using ad::util::CsvWriter; +#ifdef LIBZIP_FOUND +using ad::util::ZipCsvParser; +#endif +using pfaedle::gtfs::Writer; +using util::getTmpFName; + +// ____________________________________________________________________________ +void Writer::write(gtfs::Feed* sourceFeed, const std::string& path) const { + bool toZip = + (path.size() > 3 && 0 == path.compare(path.size() - 4, 4, ".zip")); + + std::ofstream fs; + std::string gtfsPath(path); + std::string curFile; + std::string curFileTg; + + std::string tmpZip; + std::string zipFileName; + + if (gtfsPath.size() == 0) gtfsPath = "."; + +#ifdef LIBZIP_FOUND + zip* za = 0; + + if (toZip) { + const size_t slashIdx = path.rfind('/'); + if (slashIdx != std::string::npos) { + zipFileName = path.substr(slashIdx + 1, -1); + gtfsPath = path.substr(0, slashIdx); + } else { + zipFileName = path; + gtfsPath = "."; + } + + tmpZip = getTmpFName(gtfsPath, ".pfaedle-tmp", zipFileName); + + int zipErr = 0; + za = zip_open(tmpZip.c_str(), ZIP_CREATE | ZIP_TRUNCATE, &zipErr); + + if (zipErr != 0) { + char errBuf[100]; + zip_error_to_str(errBuf, sizeof(errBuf), zipErr, errno); + cannotWrite(tmpZip, gtfsPath + "/" + zipFileName); + std::stringstream ss; + ss << "(temporary file for " << (gtfsPath + "/" + zipFileName) + << ") Could not open ZIP file, reason was: " << errBuf; + throw ad::cppgtfs::WriterException(ss.str(), tmpZip); + } +#else + if (toZip) { + throw ad::cppgtfs::WriterException( + "Could not output ZIP file, pfaedle was compiled without libzip", path); +#endif + } else { + mkdir(path.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); + } + + try { + Parser ip(sourceFeed->getPath()); + + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "agency.txt"); + curFileTg = gtfsPath + "/agency.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeAgency(sourceFeed, &fs); + fs.close(); + + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "agency.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "stops.txt"); + curFileTg = gtfsPath + "/stops.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeStops(sourceFeed, &fs); + fs.close(); + + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "stops.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "routes.txt"); + curFileTg = gtfsPath + "/routes.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeRoutes(sourceFeed, &fs); + fs.close(); + + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "routes.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + + auto csvp = ip.getCsvParser("calendar.txt"); + if (csvp->isGood()) { + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "calendar.txt"); + curFileTg = gtfsPath + "/calendar.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeCalendar(sourceFeed, &fs); + fs.close(); + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "calendar.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + } + + csvp = ip.getCsvParser("calendar_dates.txt"); + if (csvp->isGood()) { + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "calendar_dates.txt"); + curFileTg = gtfsPath + "/calendar_dates.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeCalendarDates(sourceFeed, &fs); + fs.close(); + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "calendar_dates.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + } + + csvp = ip.getCsvParser("transfers.txt"); + if (csvp->isGood()) { + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "transfers.txt"); + curFileTg = gtfsPath + "/transfers.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeTransfers(sourceFeed, &fs); + fs.close(); + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "transfers.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + } + + csvp = ip.getCsvParser("fare_attributes.txt"); + if (csvp->isGood()) { + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "fare_attributes.txt"); + curFileTg = gtfsPath + "/fare_attributes.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeFares(sourceFeed, &fs); + fs.close(); + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "fare_attributes.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + } + + csvp = ip.getCsvParser("fare_rules.txt"); + if (csvp->isGood()) { + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "fare_rules.txt"); + curFileTg = gtfsPath + "/fare_rules.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeFareRules(sourceFeed, &fs); + fs.close(); + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "fare_rules.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + } + + csvp = ip.getCsvParser("pathways.txt"); + if (csvp->isGood()) { + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "pathways.txt"); + curFileTg = gtfsPath + "/pathways.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writePathways(sourceFeed, &fs); + fs.close(); + + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "pathways.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + } + + csvp = ip.getCsvParser("levels.txt"); + if (csvp->isGood()) { + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "levels.txt"); + curFileTg = gtfsPath + "/levels.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeLevels(sourceFeed, &fs); + fs.close(); + + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "levels.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + } + + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "shapes.txt"); + curFileTg = gtfsPath + "/shapes.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeShapes(sourceFeed, &fs); + fs.close(); + + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "shapes.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "trips.txt"); + curFileTg = gtfsPath + "/trips.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + bool hasFreqs = writeTrips(sourceFeed, &fs); + fs.close(); + + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "trips.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + + csvp = ip.getCsvParser("frequencies.txt"); + if (hasFreqs && csvp->isGood()) { + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "frequencies.txt"); + curFileTg = gtfsPath + "/frequencies.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeFrequencies(sourceFeed, &fs); + fs.close(); + + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "frequencies.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + } + + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "stop_times.txt"); + curFileTg = gtfsPath + "/stop_times.txt"; + fs.open(curFile.c_str()); + + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeStopTimes(sourceFeed, &fs); + fs.close(); + + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "stop_times.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + + if (!sourceFeed->getPublisherUrl().empty() && + !sourceFeed->getPublisherName().empty()) { + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "feed_info.txt"); + curFileTg = gtfsPath + "/feed_info.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeFeedInfo(sourceFeed, &fs); + fs.close(); + + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "feed_info.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + } + + curFile = getTmpFName(gtfsPath, ".pfaedle-tmp", "attributions.txt"); + curFileTg = gtfsPath + "/attributions.txt"; + fs.open(curFile.c_str()); + if (!fs.good()) cannotWrite(curFile, curFileTg); + writeAttribution(sourceFeed, &fs); + fs.close(); + + if (toZip) { +#ifdef LIBZIP_FOUND + moveIntoZip(za, curFile, "attributions.txt"); +#endif + } else { + if (std::rename(curFile.c_str(), curFileTg.c_str())) + cannotWrite(curFileTg); + } + } catch (...) { +#ifdef LIBZIP_FOUND + zip_discard(za); +#endif + throw; + } + + if (toZip) { +#ifdef LIBZIP_FOUND + std::string targetZipPath = gtfsPath + "/" + zipFileName; + if (!za) cannotWrite(targetZipPath); + zip_close(za); + if (std::rename(tmpZip.c_str(), targetZipPath.c_str())) + cannotWrite(targetZipPath); +#endif + } +} + +// ____________________________________________________________________________ +void Writer::writeAttribution(gtfs::Feed*, std::ostream* os) const { + auto csvw = ad::cppgtfs::Writer::getAttributionCsvw(os); + + csvw->flushLine(); + csvw->writeString("OpenStreetMap contributors"); + csvw->writeString("https://www.openstreetmap.org/copyright"); + csvw->writeInt(1); + + csvw->flushLine(); +} + +// ____________________________________________________________________________ +void Writer::writeFeedInfo(gtfs::Feed* f, std::ostream* os) const { + auto csvw = ad::cppgtfs::Writer::getFeedInfoCsvw(os); + csvw->flushLine(); + csvw->writeString(f->getPublisherName()); + csvw->writeString(f->getPublisherUrl()); + csvw->writeString(f->getLang()); + if (!f->getStartDate().empty()) + csvw->writeInt(f->getStartDate().getYYYYMMDD()); + else + csvw->skip(); + if (!f->getEndDate().empty()) + csvw->writeInt(f->getEndDate().getYYYYMMDD()); + else + csvw->skip(); + csvw->writeString(f->getVersion()); + csvw->writeString(f->getContactEmail()); + csvw->writeString(f->getContactUrl()); + csvw->writeString(f->getDefaultLang()); + csvw->flushLine(); +} + +// ____________________________________________________________________________ +void Writer::writePathways(gtfs::Feed* sourceFeed, std::ostream* os) const { + Parser p(sourceFeed->getPath()); + auto csvp = p.getCsvParser("pathways.txt"); + ad::cppgtfs::Writer w; + + auto csvw = ad::cppgtfs::Writer::getPathwayCsvw(os); + csvw->flushLine(); + + ad::cppgtfs::gtfs::flat::Pathway fa; + auto flds = Parser::getPathwayFlds(csvp.get()); + + while (p.nextPathway(csvp.get(), &fa, flds)) { + w.writePathway(fa, csvw.get()); + } +} + +// ____________________________________________________________________________ +void Writer::writeLevels(gtfs::Feed* sourceFeed, std::ostream* os) const { + Parser p(sourceFeed->getPath()); + auto csvp = p.getCsvParser("levels.txt"); + ad::cppgtfs::Writer w; + + auto csvw = ad::cppgtfs::Writer::getLevelCsvw(os); + csvw->flushLine(); + + ad::cppgtfs::gtfs::flat::Level fa; + auto flds = Parser::getLevelFlds(csvp.get()); + + while (p.nextLevel(csvp.get(), &fa, flds)) { + w.writeLevel(fa, csvw.get()); + } +} + +// ____________________________________________________________________________ +void Writer::writeAgency(gtfs::Feed* sourceFeed, std::ostream* os) const { + Parser p(sourceFeed->getPath()); + auto csvp = p.getCsvParser("agency.txt"); + + ad::cppgtfs::Writer w; + + auto csvw = + ad::cppgtfs::Writer::getAgencyCsvw(os, sourceFeed->getAgencyAddFlds()); + csvw->flushLine(); + + ad::cppgtfs::gtfs::flat::Agency fa; + auto flds = Parser::getAgencyFlds(csvp.get()); + + while (p.nextAgency(csvp.get(), &fa, flds)) { + w.writeAgency(fa, csvw.get(), sourceFeed->getAgencyAddFlds()); + } +} + +// ____________________________________________________________________________ +void Writer::writeStops(gtfs::Feed* sourceFeed, std::ostream* os) const { + Parser p(sourceFeed->getPath()); + auto csvp = p.getCsvParser("stops.txt"); + ad::cppgtfs::Writer w; + + auto csvw = + ad::cppgtfs::Writer::getStopsCsvw(os, sourceFeed->getStopAddFlds()); + csvw->flushLine(); + + ad::cppgtfs::gtfs::flat::Stop s; + auto flds = Parser::getStopFlds(csvp.get()); + + while (p.nextStop(csvp.get(), &s, flds)) { + w.writeStop(s, csvw.get(), sourceFeed->getStopAddFlds()); + } +} + +// ____________________________________________________________________________ +void Writer::writeRoutes(gtfs::Feed* sourceFeed, std::ostream* os) const { + ad::cppgtfs::Writer w; + + auto csvw = + ad::cppgtfs::Writer::getRoutesCsvw(os, sourceFeed->getRouteAddFlds()); + csvw->flushLine(); + + for (auto r : sourceFeed->getRoutes()) { + w.writeRoute(r.second->getFlat(), csvw.get(), + sourceFeed->getRouteAddFlds()); + } +} + +// ____________________________________________________________________________ +void Writer::writeCalendar(gtfs::Feed* sourceFeed, std::ostream* os) const { + Parser p(sourceFeed->getPath()); + auto csvp = p.getCsvParser("calendar.txt"); + ad::cppgtfs::Writer w; + + auto csvw = ad::cppgtfs::Writer::getCalendarCsvw(os); + csvw->flushLine(); + + ad::cppgtfs::gtfs::flat::Calendar c; + auto flds = Parser::getCalendarFlds(csvp.get()); + + while (p.nextCalendar(csvp.get(), &c, flds)) { + w.writeCalendar(c, csvw.get()); + } +} + +// ____________________________________________________________________________ +void Writer::writeCalendarDates(gtfs::Feed* sourceFeed, + std::ostream* os) const { + Parser p(sourceFeed->getPath()); + auto csvp = p.getCsvParser("calendar_dates.txt"); + ad::cppgtfs::Writer w; + + auto csvw = ad::cppgtfs::Writer::getCalendarDatesCsvw(os); + csvw->flushLine(); + + ad::cppgtfs::gtfs::flat::CalendarDate c; + auto flds = Parser::getCalendarDateFlds(csvp.get()); + + while (p.nextCalendarDate(csvp.get(), &c, flds)) { + w.writeCalendarDate(c, csvw.get()); + } +} + +// ____________________________________________________________________________ +void Writer::writeFrequencies(gtfs::Feed* sourceFeed, std::ostream* os) const { + Parser p(sourceFeed->getPath()); + auto csvp = p.getCsvParser("frequencies.txt"); + ad::cppgtfs::Writer w; + + auto csvw = ad::cppgtfs::Writer::getFrequencyCsvw(os); + csvw->flushLine(); + + ad::cppgtfs::gtfs::flat::Frequency f; + auto flds = Parser::getFrequencyFlds(csvp.get()); + + while (p.nextFrequency(csvp.get(), &f, flds)) { + w.writeFrequency(f, csvw.get()); + } +} + +// ____________________________________________________________________________ +void Writer::writeTransfers(gtfs::Feed* sourceFeed, std::ostream* os) const { + Parser p(sourceFeed->getPath()); + auto csvp = p.getCsvParser("transfers.txt"); + ad::cppgtfs::Writer w; + + auto csvw = ad::cppgtfs::Writer::getTransfersCsvw(os); + csvw->flushLine(); + + ad::cppgtfs::gtfs::flat::Transfer t; + auto flds = Parser::getTransfersFlds(csvp.get()); + + while (p.nextTransfer(csvp.get(), &t, flds)) { + w.writeTransfer(t, csvw.get()); + } +} + +// ____________________________________________________________________________ +void Writer::writeFares(gtfs::Feed* sourceFeed, std::ostream* os) const { + Parser p(sourceFeed->getPath()); + auto csvp = p.getCsvParser("fare_attributes.txt"); + ad::cppgtfs::Writer w; + + auto csvw = ad::cppgtfs::Writer::getFaresCsvw(os); + csvw->flushLine(); + + ad::cppgtfs::gtfs::flat::Fare f; + auto flds = Parser::getFareFlds(csvp.get()); + + while (p.nextFare(csvp.get(), &f, flds)) { + w.writeFare(f, csvw.get()); + } +} + +// ____________________________________________________________________________ +void Writer::writeFareRules(gtfs::Feed* sourceFeed, std::ostream* os) const { + Parser p(sourceFeed->getPath()); + auto csvp = p.getCsvParser("fare_rules.txt"); + ad::cppgtfs::Writer w; + + auto csvw = ad::cppgtfs::Writer::getFareRulesCsvw(os); + csvw->flushLine(); + + ad::cppgtfs::gtfs::flat::FareRule f; + auto flds = Parser::getFareRuleFlds(csvp.get()); + + while (p.nextFareRule(csvp.get(), &f, flds)) { + w.writeFareRule(f, csvw.get()); + } +} + +// ____________________________________________________________________________ +void Writer::writeShapes(gtfs::Feed* sourceFeed, std::ostream* os) const { + auto csvw = ad::cppgtfs::Writer::getShapesCsvw(os); + csvw->flushLine(); + ad::cppgtfs::gtfs::flat::ShapePoint sp; + ad::cppgtfs::Writer w; + + Parser p(sourceFeed->getPath()); + auto csvp = p.getCsvParser("shapes.txt"); + + if (csvp->isGood()) { + auto flds = Parser::getShapeFlds(csvp.get()); + + std::string curShapeId; + std::string curSkipShapeId; + + while (p.nextShapePoint(csvp.get(), &sp, flds)) { + if (sp.id == curSkipShapeId) continue; + if (sp.id != curShapeId) { + if (sourceFeed->getShapes().has(sp.id)) { + curShapeId = sp.id; + } else { + curSkipShapeId = sp.id; + continue; + } + } + + w.writeShapePoint(sp, csvw.get()); + } + } + + sourceFeed->getShapes().open(); + while (sourceFeed->getShapes().nextStoragePt(&sp)) { + w.writeShapePoint(sp, csvw.get()); + } +} + +// ____________________________________________________________________________ +bool Writer::writeTrips(gtfs::Feed* sourceFeed, std::ostream* os) const { + ad::cppgtfs::Writer w; + bool hasFreqs = false; + + auto csvw = + ad::cppgtfs::Writer::getTripsCsvw(os, sourceFeed->getTripAddFlds()); + csvw->flushLine(); + + for (auto t : sourceFeed->getTrips()) { + if (t.getFrequencies().size()) hasFreqs = true; + w.writeTrip(t.getFlat(), csvw.get(), sourceFeed->getTripAddFlds()); + } + + return hasFreqs; +} + +// ____________________________________________________________________________ +void Writer::writeStopTimes(gtfs::Feed* sourceFeed, std::ostream* os) const { + Parser p(sourceFeed->getPath()); + auto csvp = p.getCsvParser("stop_times.txt"); + ad::cppgtfs::Writer w; + + auto csvw = ad::cppgtfs::Writer::getStopTimesCsvw(os); + csvw->flushLine(); + + ad::cppgtfs::gtfs::flat::StopTime st; + auto flds = Parser::getStopTimeFlds(csvp.get()); + + std::string curTripId; + Trip* cur = 0; + + while (p.nextStopTime(csvp.get(), &st, flds)) { + // we may have changed to distance field + if (curTripId != st.trip) { + cur = sourceFeed->getTrips().get(st.trip); + curTripId = st.trip; + } + for (const auto& stN : cur->getStopTimes()) { + if (stN.getSeq() == st.sequence) + st.shapeDistTravelled = stN.getShapeDistanceTravelled(); + } + + w.writeStopTime(st, csvw.get()); + } +} + +// ___________________________________________________________________________ +void Writer::cannotWrite(const std::string& file) { + std::stringstream ss; + ss << "Could not write to file"; + throw ad::cppgtfs::WriterException(ss.str(), file); +} + +// ___________________________________________________________________________ +void Writer::cannotWrite(const std::string& file, const std::string& file2) { + std::stringstream ss; + ss << "(temporary file for " << file2 << ") Could not write to file"; + throw ad::cppgtfs::WriterException(ss.str(), file); +} + +// ___________________________________________________________________________ +#ifdef LIBZIP_FOUND +void Writer::moveIntoZip(zip* za, const std::string& sourcePath, + const std::string& targetPath) { + zip_source_t* s; + FILE* fp = fopen(sourcePath.c_str(), "r"); + if (fp == 0) { + std::stringstream ss; + ss << "(temporary file for " << targetPath << ") Could not open file"; + throw ad::cppgtfs::WriterException(ss.str(), sourcePath); + } + + // immediately unlink + unlink(sourcePath.c_str()); + + if ((s = zip_source_filep(za, fp, 0, -1)) == 0 || + zip_file_add(za, targetPath.c_str(), s, ZIP_FL_ENC_UTF_8) < 0) { + zip_source_free(s); + cannotWrite(targetPath); + } +} +#endif diff --git a/src/pfaedle/gtfs/Writer.h b/src/pfaedle/gtfs/Writer.h new file mode 100644 index 0000000..e824edc --- /dev/null +++ b/src/pfaedle/gtfs/Writer.h @@ -0,0 +1,59 @@ +// Copyright 2016, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_GTFS_WRITER_H_ +#define PFAEDLE_GTFS_WRITER_H_ + +#include +#include +#ifdef LIBZIP_FOUND +#include +#endif + +#include "Feed.h" +#include "ad/cppgtfs/Parser.h" +#include "ad/cppgtfs/Writer.h" + +namespace pfaedle { +namespace gtfs { + +class Writer { + public: + Writer() {} + + void write(Feed* sourceFeed, const std::string& path) const; + + private: + void writeFeedInfo(Feed* f, std::ostream* os) const; + void writeAgency(Feed* f, std::ostream* os) const; + void writeStops(Feed* f, std::ostream* os) const; + void writeRoutes(Feed* f, std::ostream* os) const; + void writeCalendar(Feed* f, std::ostream* os) const; + void writeCalendarDates(Feed* f, std::ostream* os) const; + void writeFrequencies(Feed* f, std::ostream* os) const; + void writeTransfers(Feed* f, std::ostream* os) const; + void writeFares(Feed* f, std::ostream* os) const; + void writeFareRules(Feed* f, std::ostream* os) const; + void writeShapes(Feed* f, std::ostream* os) const; + bool writeTrips(Feed* f, std::ostream* os) const; + void writeStopTimes(Feed* f, std::ostream* os) const; + void writeLevels(Feed* f, std::ostream* os) const; + void writePathways(Feed* f, std::ostream* os) const; + void writeAttribution(Feed* f, std::ostream* os) const; + + static void cannotWrite(const std::string& file, const std::string& file2); + static void cannotWrite(const std::string& file); + +#ifdef LIBZIP_FOUND + static void moveIntoZip(zip* za, const std::string& sourcePath, + const std::string& targetPath); +#endif + + mutable std::ifstream _ifs; +}; + +} // namespace gtfs +} // namespace pfaedle + +#endif // PFAEDLE_GTFS_WRITER_H_ diff --git a/src/pfaedle/netgraph/EdgePL.h b/src/pfaedle/netgraph/EdgePL.h index 8950e7d..c6803ea 100644 --- a/src/pfaedle/netgraph/EdgePL.h +++ b/src/pfaedle/netgraph/EdgePL.h @@ -7,12 +7,14 @@ #include #include -#include +#include #include "ad/cppgtfs/gtfs/Feed.h" +#include "pfaedle/gtfs/Feed.h" +#include "util/String.h" #include "util/geo/GeoGraph.h" using util::geograph::GeoEdgePL; -using ad::cppgtfs::gtfs::Trip; +using pfaedle::gtfs::Trip; namespace pfaedle { namespace netgraph { @@ -21,19 +23,32 @@ namespace netgraph { * A payload class for edges on a network graph - that is a graph * that exactly represents a physical public transit network */ -class EdgePL : public GeoEdgePL { +class EdgePL { public: EdgePL() {} - EdgePL(const util::geo::FLine& l, const std::set& trips) - : _l(l), _trips(trips) {} - const util::geo::FLine* getGeom() const { return &_l; } - void getAttrs(std::map* obj) const { - (*obj)["numtrips"] = std::to_string(_trips.size()); + EdgePL(const LINE& l, const std::vector& trips) + : _l(l), _trips(trips) { + for (const auto t : _trips) { + _routeShortNames.insert(t->getRoute()->getShortName()); + _tripShortNames.insert(t->getShortname()); + } + } + const LINE* getGeom() const { return &_l; } + util::json::Dict getAttrs() const { + util::json::Dict obj; + obj["num_trips"] = static_cast(_trips.size()); + obj["route_short_names"] = + util::json::Array(_routeShortNames.begin(), _routeShortNames.end()); + obj["trip_short_names"] = + util::json::Array(_tripShortNames.begin(), _tripShortNames.end()); + return obj; } private: - util::geo::FLine _l; - std::set _trips; + LINE _l; + std::vector _trips; + std::set _routeShortNames; + std::set _tripShortNames; }; } // namespace netgraph } // namespace pfaedle diff --git a/src/pfaedle/netgraph/Graph.h b/src/pfaedle/netgraph/Graph.h index 2d8f3f6..6c6b109 100644 --- a/src/pfaedle/netgraph/Graph.h +++ b/src/pfaedle/netgraph/Graph.h @@ -11,8 +11,8 @@ using util::geo::Point; using util::geo::Line; -using util::geo::FPoint; -using util::geo::FLine; + + namespace pfaedle { namespace netgraph { diff --git a/src/pfaedle/netgraph/NodePL.h b/src/pfaedle/netgraph/NodePL.h index 8588536..fd33a1f 100644 --- a/src/pfaedle/netgraph/NodePL.h +++ b/src/pfaedle/netgraph/NodePL.h @@ -19,18 +19,16 @@ namespace netgraph { * A payload class for edges on a network graph - that is a graph * that exactly represents a physical public transit network */ -class NodePL : public GeoNodePL { +class NodePL { public: NodePL() {} - NodePL(const util::geo::FPoint& geom) { _geom = geom; } // NOLINT + NodePL(const POINT& geom) { _geom = geom; } // NOLINT - const util::geo::FPoint* getGeom() const { return &_geom; } - void getAttrs(std::map* attrs) const { - UNUSED(attrs); - } + const POINT* getGeom() const { return &_geom; } + util::json::Dict getAttrs() const { return util::json::Dict(); } private: - util::geo::FPoint _geom; + POINT _geom; }; } // namespace netgraph } // namespace pfaedle diff --git a/src/pfaedle/osm/BBoxIdx.cpp b/src/pfaedle/osm/BBoxIdx.cpp index e5d7c08..c841d47 100644 --- a/src/pfaedle/osm/BBoxIdx.cpp +++ b/src/pfaedle/osm/BBoxIdx.cpp @@ -7,10 +7,10 @@ using pfaedle::osm::BBoxIdx; // _____________________________________________________________________________ -BBoxIdx::BBoxIdx(float padding) : _padding(padding), _size(0) {} +BBoxIdx::BBoxIdx(double padding) : _padding(padding), _size(0) {} // _____________________________________________________________________________ -void BBoxIdx::add(Box box) { +void BBoxIdx::add(Box box) { // division by 83.000m is only correct here around a latitude deg of 25, // but should be a good heuristic. 1 deg is around 63km at latitude deg of 44, // and 110 at deg=0, since we usually dont do map matching in the arctic, @@ -24,21 +24,46 @@ void BBoxIdx::add(Box box) { size_t BBoxIdx::size() const { return _size; } // _____________________________________________________________________________ -bool BBoxIdx::contains(const Point& p) const { +bool BBoxIdx::contains(const Point& p) const { return treeHas(p, _root); } // _____________________________________________________________________________ -util::geo::Box BBoxIdx::getFullWebMercBox() const { - return util::geo::FBox( - util::geo::latLngToWebMerc(_root.box.getLowerLeft().getY(), - _root.box.getLowerLeft().getX()), - util::geo::latLngToWebMerc(_root.box.getUpperRight().getY(), - _root.box.getUpperRight().getX())); +BOX BBoxIdx::getFullWebMercBox() const { + return BOX( + util::geo::latLngToWebMerc(_root.box.getLowerLeft().getY(), + _root.box.getLowerLeft().getX()), + util::geo::latLngToWebMerc(_root.box.getUpperRight().getY(), + _root.box.getUpperRight().getX())); } // _____________________________________________________________________________ -bool BBoxIdx::treeHas(const Point& p, const BBoxIdxNd& nd) const { +BOX BBoxIdx::getFullBox() const { return _root.box; } + +// _____________________________________________________________________________ +std::vector> BBoxIdx::getLeafs() const { + std::vector> ret; + getLeafsRec(_root, &ret); + return ret; +} + +// _____________________________________________________________________________ +void BBoxIdx::getLeafsRec(const BBoxIdxNd& nd, + std::vector>* ret) const { + if (!nd.childs.size()) { + ret->push_back(nd.box); + return; + } + + for (const auto& child : nd.childs) { + getLeafsRec(child, ret); + } + + return; +} + +// _____________________________________________________________________________ +bool BBoxIdx::treeHas(const Point& p, const BBoxIdxNd& nd) const { if (!nd.childs.size()) return util::geo::contains(p, nd.box); for (const auto& child : nd.childs) { if (util::geo::contains(p, child.box)) return treeHas(p, child); @@ -48,7 +73,7 @@ bool BBoxIdx::treeHas(const Point& p, const BBoxIdxNd& nd) const { } // _____________________________________________________________________________ -void BBoxIdx::addToTree(const Box& box, BBoxIdxNd* nd, size_t lvl) { +void BBoxIdx::addToTree(const Box& box, BBoxIdxNd* nd, size_t lvl) { double bestCommonArea = 0; ssize_t bestChild = -1; diff --git a/src/pfaedle/osm/BBoxIdx.h b/src/pfaedle/osm/BBoxIdx.h index a63122a..25be2ae 100644 --- a/src/pfaedle/osm/BBoxIdx.h +++ b/src/pfaedle/osm/BBoxIdx.h @@ -6,6 +6,7 @@ #define PFAEDLE_OSM_BBOXIDX_H_ #include +#include "pfaedle/Def.h" #include "util/geo/Geo.h" namespace pfaedle { @@ -15,9 +16,9 @@ using util::geo::Box; using util::geo::Point; struct BBoxIdxNd { - BBoxIdxNd() : box(util::geo::minbox()) {} - explicit BBoxIdxNd(const Box& box) : box(box) {} - Box box; + BBoxIdxNd() : box(util::geo::minbox()) {} + explicit BBoxIdxNd(const Box& box) : box(box) {} + Box box; std::vector childs; }; @@ -26,28 +27,37 @@ struct BBoxIdxNd { */ class BBoxIdx { public: - explicit BBoxIdx(float padding); + explicit BBoxIdx(double padding); // Add a bounding box to this index - void add(Box box); + void add(Box box); // Check if a point is contained in this index - bool contains(const Point& box) const; + bool contains(const Point& box) const; // Return the full total bounding box of this index - util::geo::Box getFullWebMercBox() const; + BOX getFullWebMercBox() const; + + // Return the full total bounding box of this index + BOX getFullBox() const; // Return the size of this index size_t size() const; + // return the leaf bounding boxes of this idx + std::vector> getLeafs() const; + private: double _padding; size_t _size; BBoxIdxNd _root; - void addToTree(const Box& box, BBoxIdxNd* nd, size_t lvl); - bool treeHas(const Point& p, const BBoxIdxNd& nd) const; + void addToTree(const Box& box, BBoxIdxNd* nd, size_t lvl); + bool treeHas(const Point& p, const BBoxIdxNd& nd) const; + + void getLeafsRec(const BBoxIdxNd& nd, + std::vector>* ret) const; static const size_t MAX_LVL = 5; static constexpr double MIN_COM_AREA = 0.0; diff --git a/src/pfaedle/osm/Osm.h b/src/pfaedle/osm/Osm.h index 2fd5bfa..a57f9ea 100644 --- a/src/pfaedle/osm/Osm.h +++ b/src/pfaedle/osm/Osm.h @@ -5,10 +5,12 @@ #ifndef PFAEDLE_OSM_OSM_H_ #define PFAEDLE_OSM_OSM_H_ +#include + #include #include -#include #include +#include #include namespace pfaedle { diff --git a/src/pfaedle/osm/OsmBuilder.cpp b/src/pfaedle/osm/OsmBuilder.cpp index 455bb1d..ce30315 100644 --- a/src/pfaedle/osm/OsmBuilder.cpp +++ b/src/pfaedle/osm/OsmBuilder.cpp @@ -3,6 +3,7 @@ // Authors: Patrick Brosi #include + #include #include #include @@ -12,47 +13,56 @@ #include #include #include + +#include "pfaedle/Def.h" +#include "pfaedle/_config.h" #include "pfaedle/osm/BBoxIdx.h" #include "pfaedle/osm/Osm.h" #include "pfaedle/osm/OsmBuilder.h" #include "pfaedle/osm/OsmFilter.h" #include "pfaedle/osm/Restrictor.h" -#include "pfaedle/trgraph/StatGroup.h" #include "util/Misc.h" #include "util/Nullable.h" #include "util/log/Log.h" -#include "xml/File.h" +#include "pfxml/pfxml.h" -using util::geo::webMercMeterDist; -using util::geo::Box; -using util::Nullable; -using pfaedle::trgraph::Normalizer; +using ad::cppgtfs::gtfs::Stop; +using pfaedle::osm::BlockSearch; +using pfaedle::osm::EdgeGrid; +using pfaedle::osm::EqSearch; +using pfaedle::osm::NodeGrid; +using pfaedle::osm::OsmBuilder; +using pfaedle::osm::OsmNode; +using pfaedle::osm::OsmRel; +using pfaedle::osm::OsmWay; +using pfaedle::trgraph::Component; +using pfaedle::trgraph::Edge; +using pfaedle::trgraph::EdgePL; using pfaedle::trgraph::Graph; using pfaedle::trgraph::Node; using pfaedle::trgraph::NodePL; -using pfaedle::trgraph::Edge; -using pfaedle::trgraph::EdgePL; -using pfaedle::trgraph::TransitEdgeLine; +using pfaedle::trgraph::Normalizer; using pfaedle::trgraph::StatInfo; -using pfaedle::trgraph::StatGroup; -using pfaedle::trgraph::Component; -using pfaedle::osm::OsmBuilder; -using pfaedle::osm::OsmWay; -using pfaedle::osm::OsmRel; -using pfaedle::osm::OsmNode; -using pfaedle::osm::EdgeGrid; -using pfaedle::osm::NodeGrid; -using ad::cppgtfs::gtfs::Stop; +using pfaedle::trgraph::TransitEdgeLine; +using util::Nullable; +using util::geo::Box; +using util::geo::M_PER_DEG; + +// _____________________________________________________________________________ +bool EqSearch::operator()(const Node* cand, const StatInfo* si) const { + return cand->pl().getSI() && cand->pl().getSI()->simi(si) > minSimi; +} // _____________________________________________________________________________ OsmBuilder::OsmBuilder() {} // _____________________________________________________________________________ void OsmBuilder::read(const std::string& path, const OsmReadOpts& opts, - Graph* g, const BBoxIdx& bbox, size_t gridSize, - router::FeedStops* fs, Restrictor* res) { + Graph* g, const BBoxIdx& bbox, double gridSize, + Restrictor* res) { if (!bbox.size()) return; - if (!fs->size()) return; + + LOG(INFO) << "Reading OSM file " << path << " ... "; NodeSet orphanStations; EdgTracks eTracks; @@ -71,7 +81,7 @@ void OsmBuilder::read(const std::string& path, const OsmReadOpts& opts, OsmFilter filter(opts); - xml::File xml(path); + pfxml::file xml(path); // we do four passes of the file here to be as memory creedy as possible: // - the first pass collects all node IDs which are @@ -87,137 +97,220 @@ void OsmBuilder::read(const std::string& path, const OsmReadOpts& opts, // * match the filter criteria // * have been used in a way in pass 3 - LOG(VDEBUG) << "Reading bounding box nodes..."; + LOG(DEBUG) << "Reading bounding box nodes..."; skipUntil(&xml, "node"); - xml::ParserState nodeBeg = xml.state(); - xml::ParserState edgesBeg = + pfxml::parser_state nodeBeg = xml.state(); + pfxml::parser_state edgesBeg = readBBoxNds(&xml, &bboxNodes, &noHupNodes, filter, bbox); - LOG(VDEBUG) << "Reading relations..."; + LOG(DEBUG) << "Reading relations..."; skipUntil(&xml, "relation"); readRels(&xml, &intmRels, &nodeRels, &wayRels, filter, attrKeys[2], &rawRests); - LOG(VDEBUG) << "Reading edges..."; - xml.setState(edgesBeg); + LOG(DEBUG) << "Reading edges..."; + xml.set_state(edgesBeg); readEdges(&xml, g, intmRels, wayRels, filter, bboxNodes, &nodes, &multNodes, noHupNodes, attrKeys[1], rawRests, res, intmRels.flat, &eTracks, opts); - LOG(VDEBUG) << "Reading kept nodes..."; - xml.setState(nodeBeg); + LOG(DEBUG) << "Reading kept nodes..."; + xml.set_state(nodeBeg); readNodes(&xml, g, intmRels, nodeRels, filter, bboxNodes, &nodes, &multNodes, &orphanStations, attrKeys[0], intmRels.flat, opts); } - LOG(VDEBUG) << "OSM ID set lookups: " << osm::OsmIdSet::LOOKUPS - << ", file lookups: " << osm::OsmIdSet::FLOOKUPS; + LOG(DEBUG) << "OSM ID set lookups: " << osm::OsmIdSet::LOOKUPS + << ", file lookups: " << osm::OsmIdSet::FLOOKUPS; - LOG(VDEBUG) << "Applying edge track numbers..."; + LOG(DEBUG) << "Applying edge track numbers..."; writeEdgeTracks(eTracks); eTracks.clear(); { - LOG(VDEBUG) << "Fixing gaps..."; - NodeGrid ng = buildNodeIdx(g, gridSize, bbox.getFullWebMercBox(), false); + LOG(DEBUG) << "Fixing gaps..."; + NodeGrid ng = buildNodeIdx(g, gridSize, bbox.getFullBox(), false); + LOG(DEBUG) << "Grid size of " << ng.getXWidth() << "x" << ng.getYHeight(); fixGaps(g, &ng); } - LOG(VDEBUG) << "Writing edge geoms..."; - writeGeoms(g); + LOG(DEBUG) << "Snapping stations..."; + snapStats(opts, g, bbox, gridSize, res, orphanStations); - { - NodeGrid sng = buildNodeIdx(g, gridSize, bbox.getFullWebMercBox(), true); - EdgeGrid eg = buildEdgeIdx(g, gridSize, bbox.getFullWebMercBox()); + LOG(DEBUG) << "Collapsing edges..."; + collapseEdges(g); - LOG(DEBUG) << "Grid size of " << sng.getXWidth() << "x" << sng.getYHeight(); + LOG(DEBUG) << "Writing edge geoms..."; + writeGeoms(g, opts); - for (double d : opts.maxSnapDistances) { - for (auto s : orphanStations) { - FPoint geom = *s->pl().getGeom(); - NodePL pl = s->pl(); - pl.getSI()->setIsFromOsm(false); - const auto& r = snapStation(g, &pl, &eg, &sng, opts, res, false, d); - groupStats(r); - for (auto n : r) { - // if the snapped station is very near to the original OSM - // station, set is-from-osm to true - if (webMercMeterDist(geom, *n->pl().getGeom()) < - opts.maxOsmStationDistance) { - if (n->pl().getSI()) n->pl().getSI()->setIsFromOsm(true); - } - } - } + LOG(DEBUG) << "Deleting orphan nodes..."; + deleteOrphNds(g, opts); + + LOG(DEBUG) << "Writing graph components..."; + // the restrictor is needed here to prevent connections in the graph + // which are not possible in reality + uint32_t comps = writeComps(g, opts); + + LOG(DEBUG) << "Simplifying geometries..."; + simplifyGeoms(g); + + LOG(DEBUG) << "Writing other-direction edges..."; + writeODirEdgs(g, res); + + LOG(DEBUG) << "Write wrong-direction costs..."; + writeOneWayPens(g, opts); + + if (opts.noLinesPunishFact != 1.0) { + LOG(DEBUG) << "Write no-line pens..."; + writeNoLinePens(g, opts); + } + + LOG(DEBUG) << "Write dummy node self-edges..."; + writeSelfEdgs(g); + + size_t numEdges = 0; + + for (auto* n : g->getNds()) { + numEdges += n->getAdjListOut().size(); + } + + LOG(DEBUG) << "Graph has " << g->getNds().size() << " nodes, " << numEdges + << " edges and " << comps + << " connected component(s) with more than 1 node"; + LOG(DEBUG) << _lines.size() << " transit lines have been read."; +} + +// _____________________________________________________________________________ +void OsmBuilder::osmfilterRuleWrite(std::ostream* out, + const std::vector& opts, + const BBoxIdx& latLngBox) const { + UNUSED(latLngBox); + OsmIdSet bboxNodes, noHupNodes; + MultAttrMap emptyF; + + RelLst rels; + OsmIdList ways; + RelMap nodeRels, wayRels; + + NIdMap nodes; + + OsmFilter filter; + + AttrKeySet attrKeys[3] = {}; + + for (const OsmReadOpts& o : opts) { + filter = filter.merge(OsmFilter(o.keepFilter, o.dropFilter)); + getKeptAttrKeys(o, attrKeys); + } + + *out << "--keep=\n"; + + for (auto r : filter.getKeepRules()) { + for (auto val : r.second) { + *out << r.first << "="; + if (val.first != "*") *out << val.first; + *out << "\n"; } + } - for (size_t i = 0; i < opts.maxSnapDistances.size(); i++) { - double d = opts.maxSnapDistances[i]; - for (auto& s : *fs) { - auto pl = plFromGtfs(s.first, opts); + *out << "\n"; - StatGroup* group = - groupStats(snapStation(g, &pl, &eg, &sng, opts, res, - i == opts.maxSnapDistances.size() - 1, d)); + *out << "--keep-tags=\n"; + *out << "all\n"; - if (group) { - group->addStop(s.first); - (*fs)[s.first] = *group->getNodes().begin(); - } else if (i == - opts.maxSnapDistances.size() - 1) { // only fail on last - // add a group with only this stop in it - StatGroup* dummyGroup = new StatGroup(); - Node* dummyNode = g->addNd(pl); + for (const auto& keys : attrKeys) { + for (auto val : keys) { + *out << val << "=\n"; + } + } +} - dummyNode->pl().getSI()->setGroup(dummyGroup); - dummyGroup->addNode(dummyNode); - dummyGroup->addStop(s.first); - (*fs)[s.first] = dummyNode; - LOG(WARN) << "Could not snap station " - << "(" << pl.getSI()->getName() << ")" - << " (" << s.first->getLat() << "," << s.first->getLng() - << ")"; +// _____________________________________________________________________________ +void OsmBuilder::overpassQryWrite(std::ostream* out, + const std::vector& opts, + const BBoxIdx& latLngBox) const { + OsmIdSet bboxNodes, noHupNodes; + MultAttrMap emptyF; + + RelLst rels; + OsmIdList ways; + RelMap nodeRels, wayRels; + + // TODO(patrick): not needed here! + Restrictions rests; + + NIdMap nodes; + + // always empty + NIdMultMap multNodes; + util::xml::XmlWriter wr(out, true, 4); + + *out << "\n"; + wr.openComment(); + wr.writeText(" - written by pfaedle -"); + wr.closeTag(); + wr.openTag("osm-script"); + + OsmFilter filter; + + for (const OsmReadOpts& o : opts) { + filter = filter.merge(OsmFilter(o.keepFilter, o.dropFilter)); + } + + wr.openTag("union"); + size_t c = 0; + for (auto box : latLngBox.getLeafs()) { + if (box.getLowerLeft().getX() > box.getUpperRight().getX()) continue; + c++; + wr.openComment(); + wr.writeText(std::string("Bounding box #") + std::to_string(c) + " (" + + std::to_string(box.getLowerLeft().getY()) + ", " + + std::to_string(box.getLowerLeft().getX()) + ", " + + std::to_string(box.getUpperRight().getY()) + ", " + + std::to_string(box.getUpperRight().getX()) + ")"); + wr.closeTag(); + for (auto t : std::vector{"way", "node", "relation"}) { + for (auto r : filter.getKeepRules()) { + for (auto val : r.second) { + if (t == "way" && (val.second & OsmFilter::WAY)) continue; + if (t == "relation" && (val.second & OsmFilter::REL)) continue; + if (t == "node" && (val.second & OsmFilter::NODE)) continue; + + wr.openTag("query", {{"type", t}}); + if (val.first == "*") + wr.openTag("has-kv", {{"k", r.first}}); + else + wr.openTag("has-kv", {{"k", r.first}, {"v", val.first}}); + wr.closeTag(); + wr.openTag("bbox-query", + {{"s", std::to_string(box.getLowerLeft().getY())}, + {"w", std::to_string(box.getLowerLeft().getX())}, + {"n", std::to_string(box.getUpperRight().getY())}, + {"e", std::to_string(box.getUpperRight().getX())}}); + wr.closeTag(); + wr.closeTag(); } } } } - LOG(VDEBUG) << "Deleting orphan nodes..."; - deleteOrphNds(g); + wr.closeTag(); - LOG(VDEBUG) << "Deleting orphan edges..."; - deleteOrphEdgs(g); + wr.openTag("union"); + wr.openTag("item"); + wr.closeTag(); + wr.openTag("recurse", {{"type", "down"}}); + wr.closeTag(); + wr.closeTag(); + wr.openTag("print"); - LOG(VDEBUG) << "Collapsing edges..."; - collapseEdges(g); - - LOG(VDEBUG) << "Deleting orphan nodes..."; - deleteOrphNds(g); - - LOG(VDEBUG) << "Deleting orphan edges..."; - deleteOrphEdgs(g); - - LOG(VDEBUG) << "Writing graph components..."; - // the restrictor is needed here to prevent connections in the graph - // which are not possible in reality - uint32_t comps = writeComps(g); - - LOG(VDEBUG) << "Simplifying geometries..."; - simplifyGeoms(g); - - LOG(VDEBUG) << "Writing other-direction edges..."; - writeODirEdgs(g, res); - - LOG(VDEBUG) << "Write dummy node self-edges..."; - writeSelfEdgs(g); - - LOG(INFO) << "Graph has " << g->getNds()->size() << " nodes and " << comps - << " connected component(s)"; + wr.closeTags(); } // _____________________________________________________________________________ void OsmBuilder::filterWrite(const std::string& in, const std::string& out, const std::vector& opts, - const BBoxIdx& latLngBox) { + const BBoxIdx& box) { OsmIdSet bboxNodes, noHupNodes; MultAttrMap emptyF; @@ -233,16 +326,67 @@ void OsmBuilder::filterWrite(const std::string& in, const std::string& out, // always empty NIdMultMap multNodes; - xml::File xml(in); - std::ofstream outstr; - outstr.open(out); + pfxml::file xml(in); - util::xml::XmlWriter wr(&outstr, true, 4); + BBoxIdx latLngBox = box; - outstr << "\n"; - wr.openTag("osm"); + if (latLngBox.size() == 0) { + skipUntil(&xml, "bounds"); - // TODO(patrick): write bounding box tag + const pfxml::tag& cur = xml.get(); + + if (strcmp(cur.name, "bounds") != 0) { + throw pfxml::parse_exc( + std::string("Could not find required tag"), in, 0, 0, 0); + } + + if (!cur.attr("minlat")) { + throw pfxml::parse_exc( + std::string( + "Could not find required attribute \"minlat\" for tag"), + in, 0, 0, 0); + } + if (!cur.attr("minlon")) { + throw pfxml::parse_exc( + std::string( + "Could not find required attribute \"minlon\" for tag"), + in, 0, 0, 0); + } + if (!cur.attr("maxlat")) { + throw pfxml::parse_exc( + std::string( + "Could not find required attribute \"maxlat\" for tag"), + in, 0, 0, 0); + } + if (!cur.attr("maxlon")) { + throw pfxml::parse_exc( + std::string( + "Could not find required attribute \"maxlon\" for tag"), + in, 0, 0, 0); + } + + double minlat = atof(cur.attr("minlat")); + double minlon = atof(cur.attr("minlon")); + double maxlat = atof(cur.attr("maxlat")); + double maxlon = atof(cur.attr("maxlon")); + + latLngBox.add(Box({minlon, minlat}, {maxlon, maxlat})); + } + + util::xml::XmlWriter wr(out, false, 0); + + wr.put("\n"); + wr.openTag("osm", {{"version", "0.6"}, + {"generator", std::string("pfaedle/") + VERSION_FULL}}); + wr.openTag( + "bounds", + {{"minlat", std::to_string(latLngBox.getFullBox().getLowerLeft().getY())}, + {"minlon", std::to_string(latLngBox.getFullBox().getLowerLeft().getX())}, + {"maxlat", + std::to_string(latLngBox.getFullBox().getUpperRight().getY())}, + {"maxlon", + std::to_string(latLngBox.getFullBox().getUpperRight().getX())}}); + wr.closeTag(); OsmFilter filter; AttrKeySet attrKeys[3] = {}; @@ -253,18 +397,18 @@ void OsmBuilder::filterWrite(const std::string& in, const std::string& out, } skipUntil(&xml, "node"); - xml::ParserState nodeBeg = xml.state(); - xml::ParserState edgesBeg = + pfxml::parser_state nodeBeg = xml.state(); + pfxml::parser_state edgesBeg = readBBoxNds(&xml, &bboxNodes, &noHupNodes, filter, latLngBox); skipUntil(&xml, "relation"); readRels(&xml, &rels, &nodeRels, &wayRels, filter, attrKeys[2], &rests); - xml.setState(edgesBeg); + xml.set_state(edgesBeg); readEdges(&xml, wayRels, filter, bboxNodes, attrKeys[1], &ways, &nodes, rels.flat); - xml.setState(nodeBeg); + xml.set_state(nodeBeg); readWriteNds(&xml, &wr, nodeRels, filter, bboxNodes, &nodes, attrKeys[0], rels.flat); @@ -278,14 +422,14 @@ void OsmBuilder::filterWrite(const std::string& in, const std::string& out, } // _____________________________________________________________________________ -void OsmBuilder::readWriteRels(xml::File* i, util::xml::XmlWriter* o, +void OsmBuilder::readWriteRels(pfxml::file* i, util::xml::XmlWriter* o, OsmIdList* ways, NIdMap* nodes, const OsmFilter& filter, const AttrKeySet& keepAttrs) { OsmRel rel; while ((rel = nextRel(i, filter, keepAttrs)).id) { OsmIdList realNodes, realWays; - std::vector realNodeRoles, realWayRoles; + std::vector realNodeRoles, realWayRoles; for (size_t j = 0; j < rel.ways.size(); j++) { osmid wid = rel.ways[j]; @@ -329,7 +473,7 @@ void OsmBuilder::readWriteRels(xml::File* i, util::xml::XmlWriter* o, for (const auto& kv : rel.attrs) { std::map attrs = { - {"k", kv.first}, {"v", xml::File::decode(kv.second)}}; + {"k", kv.first}, {"v", pfxml::file::decode(kv.second)}}; o->openTag("tag", attrs); o->closeTag(); } @@ -340,7 +484,7 @@ void OsmBuilder::readWriteRels(xml::File* i, util::xml::XmlWriter* o, } // _____________________________________________________________________________ -void OsmBuilder::readWriteWays(xml::File* i, util::xml::XmlWriter* o, +void OsmBuilder::readWriteWays(pfxml::file* i, util::xml::XmlWriter* o, OsmIdList* ways, const AttrKeySet& keepAttrs) const { OsmWay w; @@ -356,7 +500,7 @@ void OsmBuilder::readWriteWays(xml::File* i, util::xml::XmlWriter* o, for (const auto& kv : w.attrs) { std::map attrs; attrs["k"] = kv.first; - attrs["v"] = xml::File::decode(kv.second); + attrs["v"] = pfxml::file::decode(kv.second); o->openTag("tag", attrs); o->closeTag(); } @@ -365,34 +509,38 @@ void OsmBuilder::readWriteWays(xml::File* i, util::xml::XmlWriter* o, } // _____________________________________________________________________________ -NodePL OsmBuilder::plFromGtfs(const Stop* s, - const OsmReadOpts& ops) const { - NodePL ret(util::geo::latLngToWebMerc(s->getLat(), s->getLng()), - StatInfo(ops.statNormzer(s->getName()), - ops.trackNormzer(s->getPlatformCode()), false)); +NodePL OsmBuilder::plFromGtfs(const Stop* s, const OsmReadOpts& ops) { + NodePL ret({s->getLat(), s->getLng()}, + StatInfo(ops.statNormzer.norm(s->getName()), + ops.trackNormzer.norm(s->getPlatformCode()))); + +#ifdef PFAEDLE_STATION_IDS + // debug feature, store station id from GTFS + ret.getSI()->setId(s->getId()); +#endif if (s->getParentStation()) { - ret.getSI()->addAltName(ops.statNormzer(s->getParentStation()->getName())); + ret.getSI()->addAltName( + ops.statNormzer.norm(s->getParentStation()->getName())); } return ret; } // _____________________________________________________________________________ -xml::ParserState OsmBuilder::readBBoxNds(xml::File* xml, OsmIdSet* nodes, - OsmIdSet* nohupNodes, - const OsmFilter& filter, - const BBoxIdx& bbox) const { +pfxml::parser_state OsmBuilder::readBBoxNds(pfxml::file* xml, OsmIdSet* nodes, + OsmIdSet* nohupNodes, + const OsmFilter& filter, + const BBoxIdx& bbox) const { bool inNodeBlock = false; uint64_t curId = 0; do { - const xml::Tag& cur = xml->get(); + const pfxml::tag& cur = xml->get(); if (inNodeBlock && xml->level() == 3 && curId && strcmp(cur.name, "tag") == 0) { - if (filter.nohup(cur.attrs.find("k")->second, - cur.attrs.find("v")->second)) { + if (filter.nohup(cur.attr("k"), cur.attr("v"))) { nohupNodes->add(curId); } } @@ -403,12 +551,15 @@ xml::ParserState OsmBuilder::readBBoxNds(xml::File* xml, OsmIdSet* nodes, if (inNodeBlock) { // block ended if (strcmp(cur.name, "node")) return xml->state(); - double y = util::atof(cur.attrs.find("lat")->second, 7); - double x = util::atof(cur.attrs.find("lon")->second, 7); + double y = util::atof(cur.attr("lat"), 7); + double x = util::atof(cur.attr("lon"), 7); - if (bbox.contains(Point(x, y))) { - curId = util::atoul(cur.attrs.find("id")->second); + curId = util::atoul(cur.attr("id")); + + if (bbox.contains(Point(x, y))) { nodes->add(curId); + } else { + nodes->nadd(curId); } } } while (xml->next()); @@ -417,25 +568,25 @@ xml::ParserState OsmBuilder::readBBoxNds(xml::File* xml, OsmIdSet* nodes, } // _____________________________________________________________________________ -OsmWay OsmBuilder::nextWayWithId(xml::File* xml, osmid wid, +OsmWay OsmBuilder::nextWayWithId(pfxml::file* xml, osmid wid, const AttrKeySet& keepAttrs) const { OsmWay w; do { - const xml::Tag& cur = xml->get(); + const pfxml::tag& cur = xml->get(); if (xml->level() == 2 || xml->level() == 0) { if (w.id || strcmp(cur.name, "way")) return w; - osmid id = util::atoul(cur.attrs.find("id")->second); + osmid id = util::atoul(cur.attr("id")); if (id == wid) w.id = id; } if (w.id && xml->level() == 3) { if (strcmp(cur.name, "nd") == 0) { - w.nodes.push_back(util::atoul(cur.attrs.find("ref")->second)); + w.nodes.push_back(util::atoul(cur.attr("ref"))); } else if (strcmp(cur.name, "tag") == 0) { - if (keepAttrs.count(cur.attrs.find("k")->second)) - w.attrs[cur.attrs.find("k")->second] = cur.attrs.find("v")->second; + if (keepAttrs.count(cur.attr("k"))) + w.attrs[cur.attr("k")] = cur.attr("v"); } } } while (xml->next()); @@ -446,7 +597,7 @@ OsmWay OsmBuilder::nextWayWithId(xml::File* xml, osmid wid, } // _____________________________________________________________________________ -void OsmBuilder::skipUntil(xml::File* xml, const std::string& s) const { +void OsmBuilder::skipUntil(pfxml::file* xml, const std::string& s) const { while (xml->next() && strcmp(xml->get().name, s.c_str())) { } } @@ -467,30 +618,30 @@ bool OsmBuilder::relKeep(osmid id, const RelMap& rels, } // _____________________________________________________________________________ -OsmWay OsmBuilder::nextWay(xml::File* xml, const RelMap& wayRels, +OsmWay OsmBuilder::nextWay(pfxml::file* xml, const RelMap& wayRels, const OsmFilter& filter, const OsmIdSet& bBoxNodes, const AttrKeySet& keepAttrs, const FlatRels& fl) const { OsmWay w; do { - const xml::Tag& cur = xml->get(); + const pfxml::tag& cur = xml->get(); if (xml->level() == 2 || xml->level() == 0) { if (keepWay(w, wayRels, filter, bBoxNodes, fl)) return w; if (strcmp(cur.name, "way")) return OsmWay(); - w.id = util::atoul(cur.attrs.find("id")->second); + w.id = util::atoul(cur.attr("id")); w.nodes.clear(); w.attrs.clear(); } if (w.id && xml->level() == 3) { if (strcmp(cur.name, "nd") == 0) { - osmid nid = util::atoul(cur.attrs.find("ref")->second); + osmid nid = util::atoul(cur.attr("ref")); w.nodes.push_back(nid); } else if (strcmp(cur.name, "tag") == 0) { - if (keepAttrs.count(cur.attrs.find("k")->second)) - w.attrs[cur.attrs.find("k")->second] = cur.attrs.find("v")->second; + if (keepAttrs.count(cur.attr("k"))) + w.attrs[cur.attr("k")] = cur.attr("v"); } } } while (xml->next()); @@ -517,7 +668,7 @@ bool OsmBuilder::keepWay(const OsmWay& w, const RelMap& wayRels, } // _____________________________________________________________________________ -void OsmBuilder::readEdges(xml::File* xml, const RelMap& wayRels, +void OsmBuilder::readEdges(pfxml::file* xml, const RelMap& wayRels, const OsmFilter& filter, const OsmIdSet& bBoxNodes, const AttrKeySet& keepAttrs, OsmIdList* ret, NIdMap* nodes, const FlatRels& flat) { @@ -531,7 +682,7 @@ void OsmBuilder::readEdges(xml::File* xml, const RelMap& wayRels, } // _____________________________________________________________________________ -void OsmBuilder::readEdges(xml::File* xml, Graph* g, const RelLst& rels, +void OsmBuilder::readEdges(pfxml::file* xml, Graph* g, const RelLst& rels, const RelMap& wayRels, const OsmFilter& filter, const OsmIdSet& bBoxNodes, NIdMap* nodes, NIdMultMap* multiNodes, const OsmIdSet& noHupNodes, @@ -563,6 +714,7 @@ void OsmBuilder::readEdges(xml::File* xml, Graph* g, const RelLst& rels, } else { n = (*nodes)[nid]; } + if (last) { auto e = g->addEdg(last, n, EdgePL()); if (!e) continue; @@ -613,7 +765,7 @@ void OsmBuilder::processRestr(osmid nid, osmid wid, } // _____________________________________________________________________________ -OsmNode OsmBuilder::nextNode(xml::File* xml, NIdMap* nodes, +OsmNode OsmBuilder::nextNode(pfxml::file* xml, NIdMap* nodes, NIdMultMap* multNodes, const RelMap& nodeRels, const OsmFilter& filter, const OsmIdSet& bBoxNodes, const AttrKeySet& keepAttrs, @@ -621,7 +773,7 @@ OsmNode OsmBuilder::nextNode(xml::File* xml, NIdMap* nodes, OsmNode n; do { - const xml::Tag& cur = xml->get(); + const pfxml::tag& cur = xml->get(); if (xml->level() == 2 || xml->level() == 0) { if (keepNode(n, *nodes, *multNodes, nodeRels, bBoxNodes, filter, fl)) return n; @@ -629,14 +781,14 @@ OsmNode OsmBuilder::nextNode(xml::File* xml, NIdMap* nodes, if (strcmp(cur.name, "node")) return OsmNode(); n.attrs.clear(); - n.lat = util::atof(cur.attrs.find("lat")->second, 7); - n.lng = util::atof(cur.attrs.find("lon")->second, 7); - n.id = util::atoul(cur.attrs.find("id")->second); + n.lat = util::atof(cur.attr("lat"), 7); + n.lng = util::atof(cur.attr("lon"), 7); + n.id = util::atoul(cur.attr("id")); } if (xml->level() == 3 && n.id && strcmp(cur.name, "tag") == 0) { - if (keepAttrs.count(cur.attrs.find("k")->second)) - n.attrs[cur.attrs.find("k")->second] = cur.attrs.find("v")->second; + if (keepAttrs.count(cur.attr("k"))) + n.attrs[cur.attr("k")] = cur.attr("v"); } } while (xml->next()); @@ -663,7 +815,7 @@ bool OsmBuilder::keepNode(const OsmNode& n, const NIdMap& nodes, } // _____________________________________________________________________________ -void OsmBuilder::readWriteNds(xml::File* i, util::xml::XmlWriter* o, +void OsmBuilder::readWriteNds(pfxml::file* i, util::xml::XmlWriter* o, const RelMap& nRels, const OsmFilter& filter, const OsmIdSet& bBoxNds, NIdMap* nds, const AttrKeySet& keepAttrs, @@ -677,7 +829,8 @@ void OsmBuilder::readWriteNds(xml::File* i, util::xml::XmlWriter* o, {"lat", std::to_string(nd.lat)}, {"lon", std::to_string(nd.lng)}}); for (const auto& kv : nd.attrs) { - o->openTag("tag", {{"k", kv.first}, {"v", xml::File::decode(kv.second)}}); + o->openTag("tag", + {{"k", kv.first}, {"v", pfxml::file::decode(kv.second)}}); o->closeTag(); } o->closeTag(); @@ -685,50 +838,48 @@ void OsmBuilder::readWriteNds(xml::File* i, util::xml::XmlWriter* o, } // _____________________________________________________________________________ -void OsmBuilder::readNodes(xml::File* xml, Graph* g, const RelLst& rels, +void OsmBuilder::readNodes(pfxml::file* xml, Graph* g, const RelLst& rels, const RelMap& nodeRels, const OsmFilter& filter, const OsmIdSet& bBoxNodes, NIdMap* nodes, NIdMultMap* multNodes, NodeSet* orphanStations, const AttrKeySet& keepAttrs, const FlatRels& fl, const OsmReadOpts& opts) const { - StAttrGroups attrGroups; - OsmNode nd; while ((nd = nextNode(xml, nodes, multNodes, nodeRels, filter, bBoxNodes, keepAttrs, fl)) .id) { Node* n = 0; - auto pos = util::geo::latLngToWebMerc(nd.lat, nd.lng); + POINT pos = {nd.lng, nd.lat}; if (nodes->count(nd.id)) { n = (*nodes)[nd.id]; n->pl().setGeom(pos); if (filter.station(nd.attrs)) { - auto si = getStatInfo(n, nd.id, pos, nd.attrs, &attrGroups, nodeRels, - rels, opts); + auto si = getStatInfo(nd.id, nd.attrs, nodeRels, rels, opts); if (!si.isNull()) n->pl().setSI(si); } else if (filter.blocker(nd.attrs)) { n->pl().setBlocker(); + } else if (filter.turnCycle(nd.attrs)) { + n->pl().setTurnCycle(); } } else if ((*multNodes).count(nd.id)) { for (auto* n : (*multNodes)[nd.id]) { n->pl().setGeom(pos); if (filter.station(nd.attrs)) { - auto si = getStatInfo(n, nd.id, pos, nd.attrs, &attrGroups, nodeRels, - rels, opts); + auto si = getStatInfo(nd.id, nd.attrs, nodeRels, rels, opts); if (!si.isNull()) n->pl().setSI(si); } else if (filter.blocker(nd.attrs)) { n->pl().setBlocker(); + } else if (filter.turnCycle(nd.attrs)) { + n->pl().setTurnCycle(); } } } else { // these are nodes without any connected edges if (filter.station(nd.attrs)) { auto tmp = g->addNd(NodePL(pos)); - auto si = getStatInfo(tmp, nd.id, pos, nd.attrs, &attrGroups, nodeRels, - rels, opts); + auto si = getStatInfo(nd.id, nd.attrs, nodeRels, rels, opts); if (!si.isNull()) tmp->pl().setSI(si); if (tmp->pl().getSI()) { - tmp->pl().getSI()->setIsFromOsm(false); orphanStations->insert(tmp); } } @@ -737,12 +888,12 @@ void OsmBuilder::readNodes(xml::File* xml, Graph* g, const RelLst& rels, } // _____________________________________________________________________________ -OsmRel OsmBuilder::nextRel(xml::File* xml, const OsmFilter& filter, +OsmRel OsmBuilder::nextRel(pfxml::file* xml, const OsmFilter& filter, const AttrKeySet& keepAttrs) const { OsmRel rel; do { - const xml::Tag& cur = xml->get(); + const pfxml::tag& cur = xml->get(); if (xml->level() == 2 || xml->level() == 0) { uint64_t keepFlags = 0; uint64_t dropFlags = 0; @@ -764,34 +915,34 @@ OsmRel OsmBuilder::nextRel(xml::File* xml, const OsmFilter& filter, rel.wayRoles.clear(); rel.keepFlags = 0; rel.dropFlags = 0; - rel.id = util::atoul(cur.attrs.find("id")->second); + rel.id = util::atoul(cur.attr("id")); } if (xml->level() == 3 && rel.id) { if (strcmp(cur.name, "member") == 0) { - if (strcmp(cur.attrs.find("type")->second, "node") == 0) { - osmid id = util::atoul(cur.attrs.find("ref")->second); + if (strcmp(cur.attr("type"), "node") == 0) { + osmid id = util::atoul(cur.attr("ref")); // TODO(patrick): no need to push IDs that have been filtered out by // the bounding box!!!! rel.nodes.push_back(id); - if (cur.attrs.count("role")) { - rel.nodeRoles.push_back(cur.attrs.find("role")->second); + if (cur.attr("role")) { + rel.nodeRoles.push_back(cur.attr("role")); } else { rel.nodeRoles.push_back(""); } } - if (strcmp(cur.attrs.find("type")->second, "way") == 0) { - osmid id = util::atoul(cur.attrs.find("ref")->second); + if (strcmp(cur.attr("type"), "way") == 0) { + osmid id = util::atoul(cur.attr("ref")); rel.ways.push_back(id); - if (cur.attrs.count("role")) { - rel.wayRoles.push_back(cur.attrs.find("role")->second); + if (cur.attr("role")) { + rel.wayRoles.push_back(cur.attr("role")); } else { rel.wayRoles.push_back(""); } } } else if (strcmp(cur.name, "tag") == 0) { - if (keepAttrs.count(cur.attrs.find("k")->second)) - rel.attrs[cur.attrs.find("k")->second] = cur.attrs.find("v")->second; + if (keepAttrs.count(cur.attr("k"))) + rel.attrs[cur.attr("k")] = cur.attr("v"); } } } while (xml->next()); @@ -811,7 +962,7 @@ OsmRel OsmBuilder::nextRel(xml::File* xml, const OsmFilter& filter, } // _____________________________________________________________________________ -void OsmBuilder::readRels(xml::File* xml, RelLst* rels, RelMap* nodeRels, +void OsmBuilder::readRels(pfxml::file* xml, RelLst* rels, RelMap* nodeRels, RelMap* wayRels, const OsmFilter& filter, const AttrKeySet& keepAttrs, Restrictions* rests) const { @@ -872,13 +1023,13 @@ void OsmBuilder::readRestr(const OsmRel& rel, Restrictions* rests, // _____________________________________________________________________________ std::string OsmBuilder::getAttrByFirstMatch(const DeepAttrLst& rule, osmid id, - const AttrMap& attrs, + const AttrMap& am, const RelMap& entRels, const RelLst& rels, - const Normalizer& norm) const { + const Normalizer& normzer) const { std::string ret; for (const auto& s : rule) { - ret = norm(xml::File::decode(getAttr(s, id, attrs, entRels, rels))); + ret = normzer.norm(pfxml::file::decode(getAttr(s, id, am, entRels, rels))); if (!ret.empty()) return ret; } @@ -887,12 +1038,12 @@ std::string OsmBuilder::getAttrByFirstMatch(const DeepAttrLst& rule, osmid id, // _____________________________________________________________________________ std::vector OsmBuilder::getAttrMatchRanked( - const DeepAttrLst& rule, osmid id, const AttrMap& attrs, - const RelMap& entRels, const RelLst& rels, const Normalizer& norm) const { + const DeepAttrLst& rule, osmid id, const AttrMap& am, const RelMap& entRels, + const RelLst& rels, const Normalizer& norm) const { std::vector ret; for (const auto& s : rule) { std::string tmp = - norm(xml::File::decode(getAttr(s, id, attrs, entRels, rels))); + norm.norm(pfxml::file::decode(getAttr(s, id, am, entRels, rels))); if (!tmp.empty()) ret.push_back(tmp); } @@ -901,11 +1052,11 @@ std::vector OsmBuilder::getAttrMatchRanked( // _____________________________________________________________________________ std::string OsmBuilder::getAttr(const DeepAttrRule& s, osmid id, - const AttrMap& attrs, const RelMap& entRels, + const AttrMap& am, const RelMap& entRels, const RelLst& rels) const { if (s.relRule.kv.first.empty()) { - if (attrs.find(s.attr) != attrs.end()) { - return attrs.find(s.attr)->second; + if (am.find(s.attr) != am.end()) { + return am.find(s.attr)->second; } } else { if (entRels.count(id)) { @@ -922,9 +1073,7 @@ std::string OsmBuilder::getAttr(const DeepAttrRule& s, osmid id, } // _____________________________________________________________________________ -Nullable OsmBuilder::getStatInfo(Node* node, osmid nid, - const FPoint& pos, const AttrMap& m, - StAttrGroups* groups, +Nullable OsmBuilder::getStatInfo(osmid nid, const AttrMap& m, const RelMap& nodeRels, const RelLst& rels, const OsmReadOpts& ops) const { @@ -938,109 +1087,76 @@ Nullable OsmBuilder::getStatInfo(Node* node, osmid nid, if (!names.size()) return Nullable(); - auto ret = StatInfo(names[0], platform, true); + auto ret = StatInfo(names[0], platform); + +#ifdef PFAEDLE_STATION_IDS + ret.setId(getAttrByFirstMatch(ops.statAttrRules.idRule, nid, m, nodeRels, + rels, ops.idNormzer)); +#endif for (size_t i = 1; i < names.size(); i++) ret.addAltName(names[i]); - bool groupFound = false; - - for (const auto& rule : ops.statGroupNAttrRules) { - if (groupFound) break; - std::string ruleVal = getAttr(rule.attr, nid, m, nodeRels, rels); - if (!ruleVal.empty()) { - // check if a matching group exists - for (auto* group : (*groups)[rule.attr.attr][ruleVal]) { - if (groupFound) break; - for (const auto* member : group->getNodes()) { - if (webMercMeterDist(*member->pl().getGeom(), pos) <= rule.maxDist) { - // ok, group is matching - groupFound = true; - if (node) group->addNode(node); - ret.setGroup(group); - break; - } - } - } - } - } - - if (!groupFound) { - for (const auto& rule : ops.statGroupNAttrRules) { - std::string ruleVal = getAttr(rule.attr, nid, m, nodeRels, rels); - if (!ruleVal.empty()) { - // add new group - StatGroup* g = new StatGroup(); - if (node) g->addNode(node); - ret.setGroup(g); - (*groups)[rule.attr.attr][ruleVal].push_back(g); - break; - } - } - } - return ret; } // _____________________________________________________________________________ -double OsmBuilder::dist(const Node* a, const Node* b) const { - return webMercMeterDist(*a->pl().getGeom(), *b->pl().getGeom()); +double OsmBuilder::dist(const Node* a, const Node* b) { + return util::geo::haversine(*(a->pl().getGeom()), *(b->pl().getGeom())); } // _____________________________________________________________________________ -double OsmBuilder::webMercDistFactor(const util::geo::FPoint& a) const { - // euclidean distance on web mercator is in meters on equator, - // and proportional to cos(lat) in both y directions - - double lat = 2 * atan(exp(a.getY() / 6378137.0)) - 1.5707965; - return cos(lat); -} - -// _____________________________________________________________________________ -double OsmBuilder::webMercDist(const Node* a, const Node* b) const { - return webMercMeterDist(*a->pl().getGeom(), *b->pl().getGeom()); -} - -// _____________________________________________________________________________ -void OsmBuilder::writeGeoms(Graph* g) const { - for (auto* n : *g->getNds()) { +void OsmBuilder::writeGeoms(Graph* g, const OsmReadOpts& opts) { + for (auto* n : g->getNds()) { for (auto* e : n->getAdjListOut()) { - e->pl().addPoint(*e->getFrom()->pl().getGeom()); - e->pl().setLength(dist(e->getFrom(), e->getTo())); - e->pl().addPoint(*e->getTo()->pl().getGeom()); + if (!e->pl().getGeom()) { + e->pl().addPoint(*e->getFrom()->pl().getGeom()); + e->pl().addPoint(*e->getTo()->pl().getGeom()); + } + + e->pl().setCost(costToInt(e->pl().getLength() / + opts.levelDefSpeed[e->pl().lvl()])); } } } // _____________________________________________________________________________ -void OsmBuilder::fixGaps(Graph* g, NodeGrid* ng) const { - for (auto* n : *g->getNds()) { +void OsmBuilder::fixGaps(Graph* g, NodeGrid* ng) { + double METER = 1; + for (auto* n : g->getNds()) { if (n->getInDeg() + n->getOutDeg() == 1) { - // get all nodes in a 1 meter distance + // get all nodes in distance std::set ret; - ng->get(util::geo::pad(util::geo::getBoundingBox(*n->pl().getGeom()), 1), + double distor = util::geo::latLngDistFactor(*n->pl().getGeom()); + ng->get(util::geo::pad(util::geo::getBoundingBox(*n->pl().getGeom()), + (METER / M_PER_DEG) / distor), &ret); for (auto* nb : ret) { if (nb != n && (nb->getInDeg() + nb->getOutDeg()) == 1 && - webMercDist(nb, n) <= 1.0 && !nb->pl().getSI() && - !n->pl().getSI()) { - Node* otherN; - if (nb->getOutDeg()) - otherN = (*nb->getAdjListOut().begin())->getOtherNd(nb); - else - otherN = (*nb->getAdjListIn().begin())->getOtherNd(nb); - FLine l; - l.push_back(*otherN->pl().getGeom()); - l.push_back(*n->pl().getGeom()); + dist(nb, n) <= METER) { + // special case: both nodes are non-stations, move + // the end point nb to n and delete nb + if (!nb->pl().getSI() && !n->pl().getSI()) { + Node* otherN; + if (nb->getOutDeg()) + otherN = (*nb->getAdjListOut().begin())->getOtherNd(nb); + else + otherN = (*nb->getAdjListIn().begin())->getOtherNd(nb); - Edge* e; - if (nb->getOutDeg()) - e = g->addEdg(otherN, n, (*nb->getAdjListOut().begin())->pl()); - else - e = g->addEdg(otherN, n, (*nb->getAdjListIn().begin())->pl()); - if (e) { - *e->pl().getGeom() = l; - g->delNd(nb); - ng->remove(nb); + Edge* e; + if (nb->getOutDeg()) + e = g->addEdg(otherN, n, (*nb->getAdjListOut().begin())->pl()); + else + e = g->addEdg(otherN, n, (*nb->getAdjListIn().begin())->pl()); + if (e) { + g->delNd(nb); + ng->remove(nb); + } + } else { + // if one of the nodes is a station, just add an edge between them + if (nb->getOutDeg()) + g->addEdg(n, nb, (*nb->getAdjListOut().begin())->pl()); + else + g->addEdg(n, nb, (*nb->getAdjListIn().begin())->pl()); } } } @@ -1049,26 +1165,27 @@ void OsmBuilder::fixGaps(Graph* g, NodeGrid* ng) const { } // _____________________________________________________________________________ -EdgeGrid OsmBuilder::buildEdgeIdx(Graph* g, size_t size, - const Box& webMercBox) const { - EdgeGrid ret(size, size, webMercBox, false); - for (auto* n : *g->getNds()) { +EdgeGrid OsmBuilder::buildEdgeIdx(Graph* g, double size, const BOX& box) { + EdgeGrid ret(size, size, box, false); + for (auto* n : g->getNds()) { for (auto* e : n->getAdjListOut()) { - assert(e->pl().getGeom()); - ret.add(*e->pl().getGeom(), e); + auto llGeom = + LINE{*e->getFrom()->pl().getGeom(), *e->getTo()->pl().getGeom()}; + ret.add(llGeom, e); } } return ret; } // _____________________________________________________________________________ -NodeGrid OsmBuilder::buildNodeIdx(Graph* g, size_t size, - const Box& webMercBox, - bool which) const { - NodeGrid ret(size, size, webMercBox, false); - for (auto* n : *g->getNds()) { +NodeGrid OsmBuilder::buildNodeIdx(Graph* g, double size, const BOX& box, + bool which) { + NodeGrid ret(size, size, box, false); + for (auto* n : g->getNds()) { + // only orphan nodes if (!which && n->getInDeg() + n->getOutDeg() == 1) ret.add(*n->pl().getGeom(), n); + // only station nodes else if (which && n->pl().getSI()) ret.add(*n->pl().getGeom(), n); } @@ -1076,13 +1193,12 @@ NodeGrid OsmBuilder::buildNodeIdx(Graph* g, size_t size, } // _____________________________________________________________________________ -Node* OsmBuilder::depthSearch(const Edge* e, const StatInfo* si, - const util::geo::FPoint& p, double maxD, - int maxFullTurns, double minAngle, - const SearchFunc& sfunc) const { +Node* OsmBuilder::depthSearch(const Edge* e, const StatInfo* si, const POINT& p, + double maxD, int maxFullTurns, double minAngle, + const SearchFunc& sfunc) { // shortcuts - double dFrom = webMercMeterDist(*e->getFrom()->pl().getGeom(), p); - double dTo = webMercMeterDist(*e->getTo()->pl().getGeom(), p); + double dFrom = haversine(*e->getFrom()->pl().getGeom(), p); + double dTo = haversine(*e->getTo()->pl().getGeom(), p); if (dFrom > maxD && dTo > maxD) return 0; if (dFrom <= maxD && sfunc(e->getFrom(), si)) return e->getFrom(); @@ -1115,24 +1231,25 @@ Node* OsmBuilder::depthSearch(const Edge* e, const StatInfo* si, int fullTurn = 0; - if (cur.fromEdge && - cur.node->getInDeg() + cur.node->getOutDeg() > - 2) { // only intersection angles - const FPoint& toP = *cand->pl().getGeom(); - const FPoint& fromP = + if (cur.fromEdge && cur.node->getInDeg() + cur.node->getOutDeg() > + 2) { // only intersection angles + const POINT& toP = *cand->pl().getGeom(); + const POINT& fromP = *cur.fromEdge->getOtherNd(cur.node)->pl().getGeom(); - const FPoint& nodeP = *cur.node->pl().getGeom(); + const POINT& nodeP = *cur.node->pl().getGeom(); if (util::geo::innerProd(nodeP, fromP, toP) < minAngle) fullTurn = 1; } + double eLen = dist(edg->getFrom(), edg->getTo()); + if ((maxFullTurns < 0 || cur.fullTurns + fullTurn <= maxFullTurns) && - cur.dist + edg->pl().getLength() < maxD && !closed.count(cand)) { + cur.dist + eLen < maxD && !closed.count(cand)) { if (sfunc(cand, si)) { return cand; } else { - pq.push(NodeCand{cur.dist + edg->pl().getLength(), cand, edg, - cur.fullTurns + fullTurn}); + pq.push( + NodeCand{cur.dist + eLen, cand, edg, cur.fullTurns + fullTurn}); } } } @@ -1142,110 +1259,46 @@ Node* OsmBuilder::depthSearch(const Edge* e, const StatInfo* si, } // _____________________________________________________________________________ -bool OsmBuilder::isBlocked(const Edge* e, const StatInfo* si, - const util::geo::FPoint& p, double maxD, - int maxFullTurns, double minAngle) const { +bool OsmBuilder::isBlocked(const Edge* e, const StatInfo* si, const POINT& p, + double maxD, int maxFullTurns, double minAngle) { return depthSearch(e, si, p, maxD, maxFullTurns, minAngle, BlockSearch()); } // _____________________________________________________________________________ -Node* OsmBuilder::eqStatReach(const Edge* e, const StatInfo* si, - const util::geo::FPoint& p, double maxD, - int maxFullTurns, double minAngle) const { +Node* OsmBuilder::eqStatReach(const Edge* e, const StatInfo* si, const POINT& p, + double maxD, int maxFullTurns, double minAngle) { return depthSearch(e, si, p, maxD, maxFullTurns, minAngle, EqSearch()); } // _____________________________________________________________________________ -void OsmBuilder::getEdgCands(const FPoint& geom, EdgeCandPQ* ret, EdgeGrid* eg, - double d) const { - double distor = webMercDistFactor(geom); +void OsmBuilder::getEdgCands(const POINT& geom, EdgeCandPQ* ret, EdgeGrid* eg, + double d) { + double distor = util::geo::latLngDistFactor(geom); std::set neighs; - Box box = util::geo::pad(util::geo::getBoundingBox(geom), d / distor); + BOX box = + util::geo::pad(util::geo::getBoundingBox(geom), (d / M_PER_DEG) / distor); eg->get(box, &neighs); for (auto* e : neighs) { double dist = util::geo::distToSegment(*e->getFrom()->pl().getGeom(), *e->getTo()->pl().getGeom(), geom); - if (dist * distor <= d) { + if (dist * distor * M_PER_DEG <= d) { ret->push(EdgeCand(-dist, e)); } } } // _____________________________________________________________________________ -std::set OsmBuilder::getMatchingNds(const NodePL& s, NodeGrid* ng, - double d) const { - std::set ret; - double distor = webMercDistFactor(*s.getGeom()); - std::set neighs; - Box box = - util::geo::pad(util::geo::getBoundingBox(*s.getGeom()), d / distor); - ng->get(box, &neighs); - - for (auto* n : neighs) { - if (n->pl().getSI() && n->pl().getSI()->simi(s.getSI()) > 0.5) { - double dist = webMercMeterDist(*n->pl().getGeom(), *s.getGeom()); - if (dist < d) ret.insert(n); - } - } - - return ret; -} - -// _____________________________________________________________________________ -Node* OsmBuilder::getMatchingNd(const NodePL& s, NodeGrid* ng, double d) const { - double distor = webMercDistFactor(*s.getGeom()); - std::set neighs; - Box box = - util::geo::pad(util::geo::getBoundingBox(*s.getGeom()), d / distor); - ng->get(box, &neighs); - - Node* ret = 0; - double bestD = std::numeric_limits::max(); - - for (auto* n : neighs) { - if (n->pl().getSI() && n->pl().getSI()->simi(s.getSI()) > 0.5) { - double dist = webMercMeterDist(*n->pl().getGeom(), *s.getGeom()); - if (dist < d && dist < bestD) { - bestD = dist; - ret = n; - } - } - } - - return ret; -} - -// _____________________________________________________________________________ -std::set OsmBuilder::snapStation(Graph* g, NodePL* s, EdgeGrid* eg, - NodeGrid* sng, const OsmReadOpts& opts, - Restrictor* restor, bool surrHeur, - double d) const { +void OsmBuilder::snapStation(Graph* g, NodePL* s, EdgeGrid* eg, NodeGrid* sng, + const OsmReadOpts& opts, Restrictor* restor, + double d) { assert(s->getSI()); - std::set ret; EdgeCandPQ pq; getEdgCands(*s->getGeom(), &pq, eg, d); - if (pq.empty() && surrHeur) { - // no station found in the first round, try again with the nearest - // surrounding - // station with matching name - const Node* best = getMatchingNd(*s, sng, opts.maxSnapFallbackHeurDistance); - if (best) getEdgCands(*best->pl().getGeom(), &pq, eg, d); - } - - // TODO(patrick): problem here: the rules below may be circumvented if a node - // is - // placed - // near the end of an edge. Then, a full turn punish prevents a detection of - // nearby nodes!!! - // This is a mere performance problem and will not affect routing very much, - // as - // long as station passing is not heavily punished. - while (!pq.empty()) { auto* e = pq.top().second; pq.pop(); @@ -1262,85 +1315,44 @@ std::set OsmBuilder::snapStation(Graph* g, NodePL* s, EdgeGrid* eg, continue; } - // if the projected position is near (< 2 meters) the end point of this - // way, - // and the endpoint is not already a station, place the station there. + // if the projected position is near (< 0.5 meters) the end point of this + // way and the endpoint is not already a station, place the station there. if (!e->getFrom()->pl().getSI() && - webMercMeterDist(geom, *e->getFrom()->pl().getGeom()) < 2) { + haversine(geom, *e->getFrom()->pl().getGeom()) < .5) { e->getFrom()->pl().setSI(*s->getSI()); - if (s->getSI()->getGroup()) - s->getSI()->getGroup()->addNode(e->getFrom()); - ret.insert(e->getFrom()); } else if (!e->getTo()->pl().getSI() && - webMercMeterDist(geom, *e->getTo()->pl().getGeom()) < 2) { + haversine(geom, *e->getTo()->pl().getGeom()) < .5) { e->getTo()->pl().setSI(*s->getSI()); - if (s->getSI()->getGroup()) s->getSI()->getGroup()->addNode(e->getTo()); - ret.insert(e->getTo()); } else { s->setGeom(geom); Node* n = g->addNd(*s); - if (n->pl().getSI()->getGroup()) - n->pl().getSI()->getGroup()->addNode(n); sng->add(geom, n); auto ne = g->addEdg(e->getFrom(), n, e->pl()); - ne->pl().setLength(webMercDist(n, e->getFrom())); - FLine l; - l.push_back(*e->getFrom()->pl().getGeom()); - l.push_back(*n->pl().getGeom()); - *ne->pl().getGeom() = l; - eg->add(l, ne); + ne->pl().setCost(costToInt(dist(e->getFrom(), n) / + opts.levelDefSpeed[ne->pl().lvl()])); + eg->add({*e->getFrom()->pl().getGeom(), *n->pl().getGeom()}, ne); auto nf = g->addEdg(n, e->getTo(), e->pl()); - nf->pl().setLength(webMercDist(n, e->getTo())); - FLine ll; - ll.push_back(*n->pl().getGeom()); - ll.push_back(*e->getTo()->pl().getGeom()); - *nf->pl().getGeom() = ll; - eg->add(l, nf); + nf->pl().setCost(costToInt(dist(n, e->getTo()) / + opts.levelDefSpeed[nf->pl().lvl()])); + eg->add({*n->pl().getGeom(), *e->getTo()->pl().getGeom()}, nf); // replace edge in restrictor restor->replaceEdge(e, ne, nf); g->delEdg(e->getFrom(), e->getTo()); eg->remove(e); - ret.insert(n); } } else { - ret.insert(eq); + // if the snapped station is very near to the original OSM station + // write additional info from this snap station to the equivalent stat + if (haversine(*s->getGeom(), *eq->pl().getGeom()) < 5) { + if (eq->pl().getSI()->getTrack().empty()) + eq->pl().getSI()->setTrack(s->getSI()->getTrack()); + } } } - - // get surrounding nodes - // TODO(patrick): own distance configuration for this! - const auto& sur = getMatchingNds(*s, sng, opts.maxGroupSearchDistance); - ret.insert(sur.begin(), sur.end()); - - return ret; -} - -// _____________________________________________________________________________ -StatGroup* OsmBuilder::groupStats(const NodeSet& s) const { - if (!s.size()) return 0; - // reference group - StatGroup* ret = new StatGroup(); - bool used = false; - - for (auto* n : s) { - if (!n->pl().getSI()) continue; - used = true; - if (n->pl().getSI()->getGroup()) { - // this node is already in a group - merge this group with this one - ret->merge(n->pl().getSI()->getGroup()); - } else { - ret->addNode(n); - n->pl().getSI()->setGroup(ret); - } - } - - if (!used) delete ret; - - return ret; } // _____________________________________________________________________________ @@ -1355,12 +1367,14 @@ std::vector OsmBuilder::getLines( elp = _relLines[relId]; } else { TransitEdgeLine el; + el.color = ad::cppgtfs::gtfs::NO_COLOR; bool found = false; for (const auto& r : ops.relLinerules.sNameRule) { for (const auto& relAttr : rels.rels[relId]) { if (relAttr.first == r) { - el.shortName = ops.lineNormzer(xml::File::decode(relAttr.second)); + el.shortName = + ops.lineNormzer.norm(pfxml::file::decode(relAttr.second)); if (!el.shortName.empty()) found = true; } } @@ -1371,7 +1385,8 @@ std::vector OsmBuilder::getLines( for (const auto& r : ops.relLinerules.fromNameRule) { for (const auto& relAttr : rels.rels[relId]) { if (relAttr.first == r) { - el.fromStr = ops.statNormzer(xml::File::decode(relAttr.second)); + el.fromStr = + ops.statNormzer.norm(pfxml::file::decode(relAttr.second)); if (!el.fromStr.empty()) found = true; } } @@ -1382,13 +1397,31 @@ std::vector OsmBuilder::getLines( for (const auto& r : ops.relLinerules.toNameRule) { for (const auto& relAttr : rels.rels[relId]) { if (relAttr.first == r) { - el.toStr = ops.statNormzer(xml::File::decode(relAttr.second)); + el.toStr = + ops.statNormzer.norm(pfxml::file::decode(relAttr.second)); if (!el.toStr.empty()) found = true; } } if (found) break; } + found = false; + for (const auto& r : ops.relLinerules.colorRule) { + for (const auto& relAttr : rels.rels[relId]) { + if (relAttr.first == r) { + auto dec = pfxml::file::decode(relAttr.second); + auto color = parseHexColor(dec); + if (color == ad::cppgtfs::gtfs::NO_COLOR) + color = parseHexColor(std::string("#") + dec); + if (color != ad::cppgtfs::gtfs::NO_COLOR) { + found = true; + el.color = color; + } + } + } + if (found) break; + } + if (!el.shortName.size() && !el.fromStr.size() && !el.toStr.size()) continue; @@ -1410,15 +1443,6 @@ std::vector OsmBuilder::getLines( // _____________________________________________________________________________ void OsmBuilder::getKeptAttrKeys(const OsmReadOpts& opts, AttrKeySet sets[3]) const { - for (const auto& i : opts.statGroupNAttrRules) { - if (i.attr.relRule.kv.first.empty()) { - sets[0].insert(i.attr.attr); - } else { - sets[2].insert(i.attr.relRule.kv.first); - sets[2].insert(i.attr.attr); - } - } - for (const auto& i : opts.keepFilter) { for (size_t j = 0; j < 3; j++) sets[j].insert(i.first); } @@ -1452,6 +1476,10 @@ void OsmBuilder::getKeptAttrKeys(const OsmReadOpts& opts, sets[0].insert(i.first); } + for (const auto& i : opts.turnCycleFilter) { + sets[0].insert(i.first); + } + for (uint8_t j = 0; j < 7; j++) { for (const auto& kv : *(opts.levelFilters + j)) { sets[1].insert(kv.first); @@ -1479,6 +1507,8 @@ void OsmBuilder::getKeptAttrKeys(const OsmReadOpts& opts, opts.relLinerules.fromNameRule.end()); sets[2].insert(opts.relLinerules.sNameRule.begin(), opts.relLinerules.sNameRule.end()); + sets[2].insert(opts.relLinerules.colorRule.begin(), + opts.relLinerules.colorRule.end()); for (const auto& i : opts.statAttrRules.nameRule) { if (i.relRule.kv.first.empty()) { @@ -1506,55 +1536,47 @@ void OsmBuilder::getKeptAttrKeys(const OsmReadOpts& opts, sets[2].insert(i.attr); } } -} -// _____________________________________________________________________________ -void OsmBuilder::deleteOrphEdgs(Graph* g) const { - size_t ROUNDS = 3; - for (size_t c = 0; c < ROUNDS; c++) { - for (auto i = g->getNds()->begin(); i != g->getNds()->end();) { - if ((*i)->getInDeg() + (*i)->getOutDeg() != 1 || (*i)->pl().getSI()) { - ++i; - continue; - } - i = g->delNd(*i); - continue; - i++; - } - } -} - -// _____________________________________________________________________________ -void OsmBuilder::deleteOrphNds(Graph* g) const { - for (auto i = g->getNds()->begin(); i != g->getNds()->end();) { - if ((*i)->getInDeg() + (*i)->getOutDeg() == 0 && - !((*i)->pl().getSI() && (*i)->pl().getSI()->getGroup() && - (*i)->pl().getSI()->getGroup()->getStops().size())) { - i = g->delNd(i); - // TODO(patrick): maybe delete from node grid? + for (const auto& i : opts.statAttrRules.idRule) { + if (i.relRule.kv.first.empty()) { + sets[0].insert(i.attr); } else { - i++; + sets[2].insert(i.relRule.kv.first); + sets[2].insert(i.attr); } } } // _____________________________________________________________________________ -bool OsmBuilder::edgesSim(const Edge* a, const Edge* b) const { +void OsmBuilder::deleteOrphNds(Graph* g, const OsmReadOpts& opts) { + UNUSED(opts); + for (auto i = g->getNds().begin(); i != g->getNds().end();) { + if ((*i)->getInDeg() + (*i)->getOutDeg() != 0 || (*i)->pl().getSI()) { + ++i; + continue; + } + + i = g->delNd(*i); + } +} + +// _____________________________________________________________________________ +bool OsmBuilder::edgesSim(const Edge* a, const Edge* b) { if (static_cast(a->pl().oneWay()) ^ static_cast(b->pl().oneWay())) return false; if (a->pl().lvl() != b->pl().lvl()) return false; if (a->pl().getLines().size() != b->pl().getLines().size()) return false; - if (a->pl().getLines() != b->pl().getLines()) return false; if (a->pl().oneWay() && b->pl().oneWay()) { if (a->getFrom() != b->getTo() && a->getTo() != b->getFrom()) return false; } if (a->pl().isRestricted() || b->pl().isRestricted()) return false; + if (a->pl().getLines() != b->pl().getLines()) return false; return true; } // _____________________________________________________________________________ -const EdgePL& OsmBuilder::mergeEdgePL(Edge* a, Edge* b) const { +const EdgePL& OsmBuilder::mergeEdgePL(Edge* a, Edge* b) { const Node* n = 0; if (a->getFrom() == b->getFrom()) n = a->getFrom(); @@ -1563,39 +1585,60 @@ const EdgePL& OsmBuilder::mergeEdgePL(Edge* a, Edge* b) const { else n = a->getTo(); + if (a->pl().getGeom() == 0) { + a->pl().addPoint(*a->getFrom()->pl().getGeom()); + a->pl().addPoint(*a->getTo()->pl().getGeom()); + } + if (a->getTo() == n && b->getTo() == n) { // --> n <-- - a->pl().getGeom()->insert(a->pl().getGeom()->end(), - b->pl().getGeom()->rbegin(), - b->pl().getGeom()->rend()); + if (b->pl().getGeom()) { + a->pl().getGeom()->insert(a->pl().getGeom()->end(), + b->pl().getGeom()->rbegin(), + b->pl().getGeom()->rend()); + } else { + a->pl().getGeom()->push_back(*b->getFrom()->pl().getGeom()); + } } else if (a->getTo() == n && b->getFrom() == n) { // --> n --> - a->pl().getGeom()->insert(a->pl().getGeom()->end(), - b->pl().getGeom()->begin(), - b->pl().getGeom()->end()); + if (b->pl().getGeom()) { + a->pl().getGeom()->insert(a->pl().getGeom()->end(), + b->pl().getGeom()->begin(), + b->pl().getGeom()->end()); + } else { + a->pl().getGeom()->push_back(*b->getTo()->pl().getGeom()); + } } else if (a->getFrom() == n && b->getTo() == n) { // <-- n <-- std::reverse(a->pl().getGeom()->begin(), a->pl().getGeom()->end()); - a->pl().getGeom()->insert(a->pl().getGeom()->end(), - b->pl().getGeom()->rbegin(), - b->pl().getGeom()->rend()); + if (b->pl().getGeom()) { + a->pl().getGeom()->insert(a->pl().getGeom()->end(), + b->pl().getGeom()->rbegin(), + b->pl().getGeom()->rend()); + } else { + a->pl().getGeom()->push_back(*b->getFrom()->pl().getGeom()); + } } else { // <-- n --> std::reverse(a->pl().getGeom()->begin(), a->pl().getGeom()->end()); - a->pl().getGeom()->insert(a->pl().getGeom()->end(), - b->pl().getGeom()->begin(), - b->pl().getGeom()->end()); + if (b->pl().getGeom()) { + a->pl().getGeom()->insert(a->pl().getGeom()->end(), + b->pl().getGeom()->begin(), + b->pl().getGeom()->end()); + } else { + a->pl().getGeom()->push_back(*b->getTo()->pl().getGeom()); + } } - a->pl().setLength(a->pl().getLength() + b->pl().getLength()); - return a->pl(); } // _____________________________________________________________________________ -void OsmBuilder::collapseEdges(Graph* g) const { - for (auto* n : *g->getNds()) { - if (n->getOutDeg() + n->getInDeg() != 2 || n->pl().getSI()) continue; +void OsmBuilder::collapseEdges(Graph* g) { + for (auto n : g->getNds()) { + if (n->getOutDeg() + n->getInDeg() != 2 || n->pl().getSI() || + n->pl().isTurnCycle()) + continue; Edge* ea; Edge* eb; @@ -1611,7 +1654,7 @@ void OsmBuilder::collapseEdges(Graph* g) const { } // important, we don't have a multigraph! if the same edge - // will already exist, leave this node + // already exists, leave this node if (g->getEdg(ea->getOtherNd(n), eb->getOtherNd(n))) continue; if (g->getEdg(eb->getOtherNd(n), ea->getOtherNd(n))) continue; @@ -1621,6 +1664,7 @@ void OsmBuilder::collapseEdges(Graph* g) const { } else { g->addEdg(ea->getOtherNd(n), eb->getOtherNd(n), mergeEdgePL(ea, eb)); } + g->delEdg(ea->getFrom(), ea->getTo()); g->delEdg(eb->getFrom(), eb->getTo()); } @@ -1628,56 +1672,61 @@ void OsmBuilder::collapseEdges(Graph* g) const { } // _____________________________________________________________________________ -void OsmBuilder::simplifyGeoms(Graph* g) const { - for (auto* n : *g->getNds()) { +void OsmBuilder::simplifyGeoms(Graph* g) { + for (auto* n : g->getNds()) { for (auto* e : n->getAdjListOut()) { - (*e->pl().getGeom()) = util::geo::simplify(*e->pl().getGeom(), 0.5); + (*e->pl().getGeom()) = + util::geo::simplify(*e->pl().getGeom(), 0.5 / M_PER_DEG); } } } // _____________________________________________________________________________ -uint32_t OsmBuilder::writeComps(Graph* g) const { - Component* comp = new Component{7}; +uint32_t OsmBuilder::writeComps(Graph* g, const OsmReadOpts& opts) { + NodePL::comps.clear(); + NodePL::comps.emplace_back(Component{0}); uint32_t numC = 0; + uint64_t numNds = 0; - for (auto* n : *g->getNds()) { - if (!n->pl().getComp()) { + double fac = opts.maxSpeedCorFac; + + for (auto* n : g->getNds()) { + if (!n->pl().getCompId()) { std::stack> q; q.push(std::pair(n, 0)); while (!q.empty()) { std::pair cur = q.top(); q.pop(); - cur.first->pl().setComp(comp); + cur.first->pl().setComp(NodePL::comps.size()); + numNds++; for (auto* e : cur.first->getAdjListOut()) { - if (e->pl().lvl() < comp->minEdgeLvl) - comp->minEdgeLvl = e->pl().lvl(); - if (!e->getOtherNd(cur.first)->pl().getComp()) + double speed = opts.levelDefSpeed[e->pl().lvl()] / fac; + if (speed > NodePL::comps.back().maxSpeed) + NodePL::comps.back().maxSpeed = speed; + if (!e->getOtherNd(cur.first)->pl().getCompId()) q.push(std::pair(e->getOtherNd(cur.first), e)); } for (auto* e : cur.first->getAdjListIn()) { - if (e->pl().lvl() < comp->minEdgeLvl) - comp->minEdgeLvl = e->pl().lvl(); - if (!e->getOtherNd(cur.first)->pl().getComp()) + double speed = opts.levelDefSpeed[e->pl().lvl()] / fac; + if (speed > NodePL::comps.back().maxSpeed) + NodePL::comps.back().maxSpeed = speed; + if (!e->getOtherNd(cur.first)->pl().getCompId()) q.push(std::pair(e->getOtherNd(cur.first), e)); } } - numC++; - comp = new Component{7}; + if (numNds > 1) numC++; + NodePL::comps.emplace_back(Component{0}); + numNds = 0; } } - - // the last comp was not used - delete comp; - return numC; } // _____________________________________________________________________________ -void OsmBuilder::writeEdgeTracks(const EdgTracks& tracks) const { +void OsmBuilder::writeEdgeTracks(const EdgTracks& tracks) { for (const auto& tr : tracks) { if (tr.first->getTo()->pl().getSI() && tr.first->getTo()->pl().getSI()->getTrack().empty()) { @@ -1691,21 +1740,199 @@ void OsmBuilder::writeEdgeTracks(const EdgTracks& tracks) const { } // _____________________________________________________________________________ -void OsmBuilder::writeODirEdgs(Graph* g, Restrictor* restor) const { - for (auto* n : *g->getNds()) { +void OsmBuilder::writeODirEdgs(Graph* g, Restrictor* restor) { + for (auto* n : g->getNds()) { for (auto* e : n->getAdjListOut()) { if (g->getEdg(e->getTo(), e->getFrom())) continue; auto newE = g->addEdg(e->getTo(), e->getFrom(), e->pl().revCopy()); + assert(newE->pl().getGeom()); if (e->pl().isRestricted()) restor->duplicateEdge(e, newE); } } } // _____________________________________________________________________________ -void OsmBuilder::writeSelfEdgs(Graph* g) const { - for (auto* n : *g->getNds()) { - if (n->pl().getSI() && n->getAdjListOut().size() == 0) { - g->addEdg(n, n); +void OsmBuilder::writeSelfEdgs(Graph* g) { + // if a station only has degree 1, there is no way to arrive at this station + // without doing a full turn (because the outgoing candidate edge is always + // the incoming edge). This is a problem at end-stations. We solve this by + // adding self-edges with infinite costs - this still allows usage as + // arrivals, does not punish bends (because the node degree is still only 2) + // and prevents the usage of the edge to circumvent turn penalties + for (auto* n : g->getNds()) { + if (n->pl().getSI() && n->getAdjListOut().size() == 1) { + auto e = g->addEdg(n, n); + e->pl().setCost(std::numeric_limits::max()); + e->pl().addPoint(*e->getFrom()->pl().getGeom()); + e->pl().addPoint(*e->getTo()->pl().getGeom()); } } } + +// _____________________________________________________________________________ +void OsmBuilder::writeNoLinePens(Graph* g, const OsmReadOpts& opts) { + for (auto* n : g->getNds()) { + for (auto* e : n->getAdjListOut()) { + if (e->pl().getLines().size() == 0) { + double c = e->pl().getCost(); + c = c / 10.0; // convert into seconds + e->pl().setCost(costToInt(c * opts.noLinesPunishFact)); + } + } + } +} + +// _____________________________________________________________________________ +void OsmBuilder::writeOneWayPens(Graph* g, const OsmReadOpts& opts) { + for (auto* n : g->getNds()) { + for (auto* e : n->getAdjListOut()) { + if (e->pl().oneWay() == 2) { + double c = e->pl().getCost(); + c = c / 10.0; // convert into seconds + e->pl().setCost( + costToInt(c * opts.oneWaySpeedPen + opts.oneWayEntryCost)); + } + } + } +} + +// _____________________________________________________________________________ +bool OsmBuilder::keepFullTurn(const trgraph::Node* n, double ang) { + if (n->getInDeg() + n->getOutDeg() != 1) return false; + + const trgraph::Edge* e = 0; + if (n->getOutDeg()) + e = n->getAdjListOut().front(); + else + e = n->getAdjListIn().front(); + + auto other = e->getOtherNd(n); + + if (other->getInDeg() + other->getOutDeg() == 3) { + const trgraph::Edge* a = 0; + const trgraph::Edge* b = 0; + for (auto f : other->getAdjListIn()) { + if (f != e && !a) + a = f; + else if (f != e && !b) + b = f; + } + + for (auto f : other->getAdjListOut()) { + if (f != e && !a) + a = f; + else if (f != e && !b) + b = f; + } + + POINT ap, bp; + + if (!a || !b) return false; + + if (a->pl().getGeom() && b->pl().getGeom()) { + ap = a->pl().backHop(); + bp = b->pl().backHop(); + if (a->getTo() != other) ap = a->pl().frontHop(); + if (b->getTo() != other) bp = b->pl().frontHop(); + } else { + assert(!a->pl().getGeom()); + assert(!b->pl().getGeom()); + ap = *a->getTo()->pl().getGeom(); + bp = *b->getTo()->pl().getGeom(); + if (a->getTo() != other) ap = *a->getFrom()->pl().getGeom(); + if (b->getTo() != other) bp = *b->getFrom()->pl().getGeom(); + } + + return util::geo::innerProd(*other->pl().getGeom(), ap, bp) > ang; + } + + return false; +} + +// _____________________________________________________________________________ +void OsmBuilder::snapStats(const OsmReadOpts& opts, Graph* g, + const BBoxIdx& bbox, double gridSize, + Restrictor* res, const NodeSet& orphanStations) { + NodeGrid sng = buildNodeIdx(g, gridSize, bbox.getFullBox(), true); + EdgeGrid eg = buildEdgeIdx(g, gridSize, bbox.getFullBox()); + + LOG(DEBUG) << "Grid size of " << sng.getXWidth() << "x" << sng.getYHeight(); + + for (double d : opts.maxOsmStationDistances) { + for (auto s : orphanStations) { + NodePL pl = s->pl(); + snapStation(g, &pl, &eg, &sng, opts, res, d); + } + } +} + +// _____________________________________________________________________________ +uint32_t OsmBuilder::costToInt(double c) { + // always round upwards, otherwise when combined with the heuristic which + // is always rounded downwards the PQ monotonicity is not ensured anymore - + // with a downward rounding, the rounding errors may sum up so high that the + // path will get cheaper than the heuristic cost + uint32_t val = std::ceil(c * 10); + if (std::ceil(c * 10) > std::numeric_limits::max()) { + LOG(DEBUG) << "Cost " << c + << " does not fit in unsigned 32 bit integer, defaulting to " + << std::numeric_limits::max() << "."; + return std::numeric_limits::max(); + } + return val; +} + +// _____________________________________________________________________________ +uint32_t OsmBuilder::parseHexColor(std::string s) const { + // TODO(patrick): not very nice + size_t proced = 0; + std::transform(s.begin(), s.end(), s.begin(), ::toupper); + std::string ret = " "; + if (s.size() == 7 && s[0] == '#') { + for (size_t i = 1; i < 7; i++) { + if (isdigit(s[i])) + ret[i - 1] = s[i]; + else if (isalpha(s[i]) && (s[i] > 64 && s[i] < 71)) + ret[i - 1] = s[i]; + else + return ad::cppgtfs::gtfs::NO_COLOR; + } + + return std::stoul("0x" + ret, &proced, 16); + } + + if (s.size() == 4 && s[0] == '#') { + for (size_t i = 1; i < 4; i++) { + if (isdigit(s[i])) { + ret[(i - 1) * 2] = s[i]; + ret[(i - 1) * 2 + 1] = s[i]; + } else if (isalpha(s[i]) && (s[i] > 64 && s[i] < 71)) { + ret[(i - 1) * 2] = s[i]; + ret[(i - 1) * 2 + 1] = s[i]; + } else { + return ad::cppgtfs::gtfs::NO_COLOR; + } + } + return std::stoul("0x" + ret, &proced, 16); + } + + if (s == "BLACK") return 0x00000000; + if (s == "SILVER") return 0x00C0C0C0; + if (s == "GRAY") return 0x00808080; + if (s == "WHITE") return 0x00FFFFFF; + if (s == "MAROON") return 0x00800000; + if (s == "RED") return 0x00FF0000; + if (s == "PURPLE") return 0x00800080; + if (s == "FUCHSIA") return 0x00FF00FF; + if (s == "GREEN") return 0x00008000; + if (s == "LIME") return 0x0000FF00; + if (s == "OLIVE") return 0x00808000; + if (s == "YELLOW") return 0x00FFFF00; + if (s == "NAVY") return 0x00000080; + if (s == "BLUE") return 0x000000FF; + if (s == "TEAL") return 0x00008080; + if (s == "AQUA") return 0x0000FFFF; + + if (ret.empty()) return ad::cppgtfs::gtfs::NO_COLOR; + return std::stoul("0x" + ret, &proced, 16); +} diff --git a/src/pfaedle/osm/OsmBuilder.h b/src/pfaedle/osm/OsmBuilder.h index 5764448..e92f483 100644 --- a/src/pfaedle/osm/OsmBuilder.h +++ b/src/pfaedle/osm/OsmBuilder.h @@ -4,15 +4,15 @@ #ifndef PFAEDLE_OSM_OSMBUILDER_H_ #define PFAEDLE_OSM_OSMBUILDER_H_ - +#include #include #include -#include #include -#include #include #include +#include #include "ad/cppgtfs/gtfs/Feed.h" +#include "pfaedle/Def.h" #include "pfaedle/osm/BBoxIdx.h" #include "pfaedle/osm/OsmFilter.h" #include "pfaedle/osm/OsmIdSet.h" @@ -23,26 +23,26 @@ #include "pfaedle/trgraph/Normalizer.h" #include "pfaedle/trgraph/StatInfo.h" #include "util/Nullable.h" +#include "util/geo/Geo.h" #include "util/xml/XmlWriter.h" -#include "xml/File.h" +#include "pfxml/pfxml.h" namespace pfaedle { namespace osm { +using ad::cppgtfs::gtfs::Stop; +using pfaedle::router::NodeSet; +using pfaedle::trgraph::Component; +using pfaedle::trgraph::Edge; using pfaedle::trgraph::EdgeGrid; -using pfaedle::trgraph::NodeGrid; -using pfaedle::trgraph::Normalizer; +using pfaedle::trgraph::EdgePL; using pfaedle::trgraph::Graph; using pfaedle::trgraph::Node; +using pfaedle::trgraph::NodeGrid; using pfaedle::trgraph::NodePL; -using pfaedle::trgraph::Edge; -using pfaedle::trgraph::EdgePL; -using pfaedle::trgraph::TransitEdgeLine; +using pfaedle::trgraph::Normalizer; using pfaedle::trgraph::StatInfo; -using pfaedle::trgraph::StatGroup; -using pfaedle::trgraph::Component; -using pfaedle::router::NodeSet; -using ad::cppgtfs::gtfs::Stop; +using pfaedle::trgraph::TransitEdgeLine; using util::Nullable; struct NodeCand { @@ -57,10 +57,9 @@ struct SearchFunc { }; struct EqSearch : public SearchFunc { + EqSearch() {} double minSimi = 0.9; - bool operator()(const Node* cand, const StatInfo* si) const { - return cand->pl().getSI() && cand->pl().getSI()->simi(si) > minSimi; - } + bool operator()(const Node* cand, const StatInfo* si) const; }; struct BlockSearch : public SearchFunc { @@ -86,9 +85,18 @@ class OsmBuilder { // Read the OSM file at path, and write a graph to g. Only elements // inside the bounding box will be read void read(const std::string& path, const OsmReadOpts& opts, Graph* g, - const BBoxIdx& box, size_t gridSize, router::FeedStops* fs, - Restrictor* res); + const BBoxIdx& box, double gridSize, Restrictor* res); + // Based on the list of options, output an overpass XML query for getting + // the data needed for routing + void overpassQryWrite(std::ostream* out, const std::vector& opts, + const BBoxIdx& latLngBox) const; + + // Based on the list of options, output an osmfilter configuration file + // to filter the data needed for routing + void osmfilterRuleWrite(std::ostream* out, + const std::vector& opts, + const BBoxIdx& latLngBox) const; // Based on the list of options, read an OSM file from in and output an // OSM file to out which contains exactly the entities that are needed @@ -97,37 +105,37 @@ class OsmBuilder { const std::vector& opts, const BBoxIdx& box); private: - xml::ParserState readBBoxNds(xml::File* xml, OsmIdSet* nodes, - OsmIdSet* noHupNodes, const OsmFilter& filter, - const BBoxIdx& bbox) const; + pfxml::parser_state readBBoxNds(pfxml::file* xml, OsmIdSet* nodes, + OsmIdSet* noHupNodes, const OsmFilter& filter, + const BBoxIdx& bbox) const; - void readRels(xml::File* f, RelLst* rels, RelMap* nodeRels, RelMap* wayRels, + void readRels(pfxml::file* f, RelLst* rels, RelMap* nodeRels, RelMap* wayRels, const OsmFilter& filter, const AttrKeySet& keepAttrs, Restrictions* rests) const; void readRestr(const OsmRel& rel, Restrictions* rests, const OsmFilter& filter) const; - void readNodes(xml::File* f, Graph* g, const RelLst& rels, + void readNodes(pfxml::file* f, Graph* g, const RelLst& rels, const RelMap& nodeRels, const OsmFilter& filter, const OsmIdSet& bBoxNodes, NIdMap* nodes, NIdMultMap* multNodes, NodeSet* orphanStations, const AttrKeySet& keepAttrs, const FlatRels& flatRels, const OsmReadOpts& opts) const; - void readWriteNds(xml::File* i, util::xml::XmlWriter* o, + void readWriteNds(pfxml::file* i, util::xml::XmlWriter* o, const RelMap& nodeRels, const OsmFilter& filter, const OsmIdSet& bBoxNodes, NIdMap* nodes, const AttrKeySet& keepAttrs, const FlatRels& f) const; - void readWriteWays(xml::File* i, util::xml::XmlWriter* o, OsmIdList* ways, + void readWriteWays(pfxml::file* i, util::xml::XmlWriter* o, OsmIdList* ways, const AttrKeySet& keepAttrs) const; - void readWriteRels(xml::File* i, util::xml::XmlWriter* o, OsmIdList* ways, + void readWriteRels(pfxml::file* i, util::xml::XmlWriter* o, OsmIdList* ways, NIdMap* nodes, const OsmFilter& filter, const AttrKeySet& keepAttrs); - void readEdges(xml::File* xml, Graph* g, const RelLst& rels, + void readEdges(pfxml::file* xml, Graph* g, const RelLst& rels, const RelMap& wayRels, const OsmFilter& filter, const OsmIdSet& bBoxNodes, NIdMap* nodes, NIdMultMap* multNodes, const OsmIdSet& noHupNodes, @@ -135,21 +143,22 @@ class OsmBuilder { Restrictor* restor, const FlatRels& flatRels, EdgTracks* etracks, const OsmReadOpts& opts); - void readEdges(xml::File* xml, const RelMap& wayRels, const OsmFilter& filter, - const OsmIdSet& bBoxNodes, const AttrKeySet& keepAttrs, - OsmIdList* ret, NIdMap* nodes, const FlatRels& flatRels); + void readEdges(pfxml::file* xml, const RelMap& wayRels, + const OsmFilter& filter, const OsmIdSet& bBoxNodes, + const AttrKeySet& keepAttrs, OsmIdList* ret, NIdMap* nodes, + const FlatRels& flatRels); - OsmWay nextWay(xml::File* xml, const RelMap& wayRels, const OsmFilter& filter, - const OsmIdSet& bBoxNodes, const AttrKeySet& keepAttrs, - const FlatRels& flatRels) const; + OsmWay nextWay(pfxml::file* xml, const RelMap& wayRels, + const OsmFilter& filter, const OsmIdSet& bBoxNodes, + const AttrKeySet& keepAttrs, const FlatRels& flatRels) const; bool keepWay(const OsmWay& w, const RelMap& wayRels, const OsmFilter& filter, const OsmIdSet& bBoxNodes, const FlatRels& fl) const; - OsmWay nextWayWithId(xml::File* xml, osmid wid, + OsmWay nextWayWithId(pfxml::file* xml, osmid wid, const AttrKeySet& keepAttrs) const; - OsmNode nextNode(xml::File* xml, NIdMap* nodes, NIdMultMap* multNodes, + OsmNode nextNode(pfxml::file* xml, NIdMap* nodes, NIdMultMap* multNodes, const RelMap& nodeRels, const OsmFilter& filter, const OsmIdSet& bBoxNodes, const AttrKeySet& keepAttrs, const FlatRels& flatRels) const; @@ -159,72 +168,67 @@ class OsmBuilder { const OsmIdSet& bBoxNodes, const OsmFilter& filter, const FlatRels& fl) const; - OsmRel nextRel(xml::File* xml, const OsmFilter& filter, + OsmRel nextRel(pfxml::file* xml, const OsmFilter& filter, const AttrKeySet& keepAttrs) const; - Nullable getStatInfo(Node* node, osmid nid, const FPoint& pos, - const AttrMap& m, StAttrGroups* groups, + protected: + Nullable getStatInfo(osmid nid, const AttrMap& m, const RelMap& nodeRels, const RelLst& rels, const OsmReadOpts& ops) const; - void writeGeoms(Graph* g) const; - void deleteOrphNds(Graph* g) const; - void deleteOrphEdgs(Graph* g) const; - double dist(const Node* a, const Node* b) const; - double webMercDist(const Node* a, const Node* b) const; - double webMercDistFactor(const FPoint& a) const; + static void snapStats(const OsmReadOpts& opts, Graph* g, const BBoxIdx& bbox, + double gridSize, Restrictor* res, + const NodeSet& orphanStations); + static void writeGeoms(Graph* g, const OsmReadOpts& opts); + static void deleteOrphNds(Graph* g, const OsmReadOpts& opts); + static double dist(const Node* a, const Node* b); - NodeGrid buildNodeIdx(Graph* g, size_t size, - const util::geo::Box& webMercBox, - bool which) const; + static NodeGrid buildNodeIdx(Graph* g, double size, const BOX& box, + bool which); - EdgeGrid buildEdgeIdx(Graph* g, size_t size, - const util::geo::Box& webMercBox) const; + static EdgeGrid buildEdgeIdx(Graph* g, double size, const BOX& box); - void fixGaps(Graph* g, NodeGrid* ng) const; - void collapseEdges(Graph* g) const; - void writeODirEdgs(Graph* g, Restrictor* restor) const; - void writeSelfEdgs(Graph* g) const; - void writeEdgeTracks(const EdgTracks& tracks) const; - void simplifyGeoms(Graph* g) const; - uint32_t writeComps(Graph* g) const; - bool edgesSim(const Edge* a, const Edge* b) const; - const EdgePL& mergeEdgePL(Edge* a, Edge* b) const; - void getEdgCands(const FPoint& s, EdgeCandPQ* ret, EdgeGrid* eg, - double d) const; + static void fixGaps(Graph* g, NodeGrid* ng); + static void collapseEdges(Graph* g); + static void writeODirEdgs(Graph* g, Restrictor* restor); + static void writeSelfEdgs(Graph* g); + static void writeOneWayPens(Graph* g, const OsmReadOpts& opts); + static void writeNoLinePens(Graph* g, const OsmReadOpts& opts); + static void writeEdgeTracks(const EdgTracks& tracks); + static void simplifyGeoms(Graph* g); + static uint32_t writeComps(Graph* g, const OsmReadOpts& opts); + static bool edgesSim(const Edge* a, const Edge* b); + static const EdgePL& mergeEdgePL(Edge* a, Edge* b); + static void getEdgCands(const POINT& s, EdgeCandPQ* ret, EdgeGrid* eg, + double d); - std::set getMatchingNds(const NodePL& s, NodeGrid* ng, double d) const; - - Node* getMatchingNd(const NodePL& s, NodeGrid* ng, double d) const; - - NodeSet snapStation(Graph* g, NodePL* s, EdgeGrid* eg, NodeGrid* sng, - const OsmReadOpts& opts, Restrictor* restor, bool surHeur, - double maxD) const; + static void snapStation(Graph* g, NodePL* s, EdgeGrid* eg, NodeGrid* sng, + const OsmReadOpts& opts, Restrictor* restor, + double maxD); // Checks if from the edge e, a station similar to si can be reach with less // than maxD distance and less or equal to "maxFullTurns" full turns. If // such a station exists, it is returned. If not, 0 is returned. - Node* eqStatReach(const Edge* e, const StatInfo* si, const FPoint& p, - double maxD, int maxFullTurns, double maxAng) const; + static Node* eqStatReach(const Edge* e, const StatInfo* si, const POINT& p, + double maxD, int maxFullTurns, double maxAng); - Node* depthSearch(const Edge* e, const StatInfo* si, - const util::geo::FPoint& p, double maxD, int maxFullTurns, - double minAngle, const SearchFunc& sfunc) const; + static Node* depthSearch(const Edge* e, const StatInfo* si, const POINT& p, + double maxD, int maxFullTurns, double minAngle, + const SearchFunc& sfunc); - bool isBlocked(const Edge* e, const StatInfo* si, const FPoint& p, - double maxD, int maxFullTurns, double minAngle) const; + static bool isBlocked(const Edge* e, const StatInfo* si, const POINT& p, + double maxD, int maxFullTurns, double minAngle); + static bool keepFullTurn(const trgraph::Node* n, double ang); - StatGroup* groupStats(const NodeSet& s) const; + static NodePL plFromGtfs(const Stop* s, const OsmReadOpts& ops); std::vector getLines(const std::vector& edgeRels, const RelLst& rels, const OsmReadOpts& ops); - NodePL plFromGtfs(const Stop* s, const OsmReadOpts& ops) const; - void getKeptAttrKeys(const OsmReadOpts& opts, AttrKeySet sets[3]) const; - void skipUntil(xml::File* xml, const std::string& s) const; + void skipUntil(pfxml::file* xml, const std::string& s) const; void processRestr(osmid nid, osmid wid, const Restrictions& rawRests, Edge* e, Node* n, Restrictor* restor) const; @@ -233,6 +237,7 @@ class OsmBuilder { const AttrMap& attrs, const RelMap& entRels, const RelLst& rels, const Normalizer& norm) const; + std::vector getAttrMatchRanked(const DeepAttrLst& rule, osmid id, const AttrMap& attrs, const RelMap& entRels, @@ -244,6 +249,10 @@ class OsmBuilder { bool relKeep(osmid id, const RelMap& rels, const FlatRels& fl) const; + uint32_t parseHexColor(std::string) const; + + static uint32_t costToInt(double c); + std::map _lines; std::map _relLines; }; diff --git a/src/pfaedle/osm/OsmFilter.cpp b/src/pfaedle/osm/OsmFilter.cpp index bb2860e..d180041 100644 --- a/src/pfaedle/osm/OsmFilter.cpp +++ b/src/pfaedle/osm/OsmFilter.cpp @@ -26,6 +26,7 @@ OsmFilter::OsmFilter(const OsmReadOpts& o) _posRestr(o.restrPosRestr), _negRestr(o.restrNegRestr), _noRestr(o.noRestrFilter), + _turnCycle(o.turnCycleFilter), _levels(o.levelFilters) {} // _____________________________________________________________________________ @@ -72,6 +73,11 @@ uint64_t OsmFilter::blocker(const AttrMap& attrs) const { return contained(attrs, _blocker, NODE); } +// _____________________________________________________________________________ +uint64_t OsmFilter::turnCycle(const AttrMap& attrs) const { + return contained(attrs, _turnCycle, NODE); +} + // _____________________________________________________________________________ uint64_t OsmFilter::contained(const AttrMap& attrs, const MultAttrMap& map, Type t) { @@ -102,12 +108,12 @@ uint64_t OsmFilter::contained(const AttrMap& attrs, const Attr& attr) { // _____________________________________________________________________________ uint8_t OsmFilter::level(const AttrMap& attrs) const { // the best matching level is always returned - for (int16_t i = 0; i < 7; i++) { + for (int16_t i = 0; i < 8; i++) { for (const auto& kv : attrs) { const auto& lkv = (_levels + i)->find(kv.first); if (lkv != (_levels + i)->end()) { for (const auto& val : lkv->second) { - if (valMatches(kv.second, val.first)) return i + 1; + if (valMatches(kv.second, val.first)) return i; } } } @@ -169,7 +175,7 @@ std::vector OsmFilter::getAttrKeys() const { for (const auto& kv : _noRestr) { ret.push_back(kv.first); } - for (uint8_t i = 0; i < 7; i++) { + for (uint8_t i = 0; i < 8; i++) { for (const auto& kv : *(_levels + i)) { ret.push_back(kv.first); } @@ -191,27 +197,6 @@ OsmFilter OsmFilter::merge(const OsmFilter& other) const { keep[kv.first].insert(kv.second.begin(), kv.second.end()); } - // TODO(patrick): multi-level combination for filters. otherwise - // filter drop filters meant as a refinement for keep filters - // interfere with other keeps - - // const auto* d = &_drop; - - // for (size_t i = 0; i < 2; i++) { - // for (const auto& kv : *d) { - // if (keep.find(kv.first) != keep.end()) { - // for (const auto& val : kv.second) { - // if (keep[kv.first].find(val.first) == keep[kv.first].end()) { - // drop[kv.first].insert(val); - // } - // } - // } else { - // drop[kv.first].insert(kv.second.begin(), kv.second.end()); - // } - // } - // d = &other._drop; - // } - return OsmFilter(keep, drop); } @@ -258,3 +243,13 @@ uint64_t OsmFilter::posRestr(const AttrMap& attrs) const { if (contained(attrs, _noRestr, ALL)) return false; return (contained(attrs, _posRestr, ALL)); } + +// _____________________________________________________________________________ +const pfaedle::osm::MultAttrMap& OsmFilter::getKeepRules() const { + return _keep; +} + +// _____________________________________________________________________________ +const pfaedle::osm::MultAttrMap& OsmFilter::getDropRules() const { + return _drop; +} diff --git a/src/pfaedle/osm/OsmFilter.h b/src/pfaedle/osm/OsmFilter.h index 2fa9f82..757d03e 100644 --- a/src/pfaedle/osm/OsmFilter.h +++ b/src/pfaedle/osm/OsmFilter.h @@ -5,8 +5,11 @@ #ifndef PFAEDLE_OSM_OSMFILTER_H_ #define PFAEDLE_OSM_OSMFILTER_H_ +#include + #include #include + #include "pfaedle/osm/Osm.h" #include "pfaedle/osm/OsmReadOpts.h" @@ -27,12 +30,16 @@ class OsmFilter { uint64_t onewayrev(const AttrMap& attrs) const; uint64_t station(const AttrMap& attrs) const; uint64_t blocker(const AttrMap& attrs) const; + uint64_t turnCycle(const AttrMap& attrs) const; uint64_t negRestr(const AttrMap& attrs) const; uint64_t posRestr(const AttrMap& attrs) const; std::vector getAttrKeys() const; OsmFilter merge(const OsmFilter& other) const; + const MultAttrMap& getKeepRules() const; + const MultAttrMap& getDropRules() const; + std::string toString() const; static bool valMatches(const std::string& a, const std::string& b, bool m); @@ -43,7 +50,7 @@ class OsmFilter { private: MultAttrMap _keep, _drop, _nohup, _oneway, _onewayrev, _twoway, _station, - _blocker, _posRestr, _negRestr, _noRestr; + _blocker, _posRestr, _negRestr, _noRestr, _turnCycle; const MultAttrMap* _levels; }; } // namespace osm diff --git a/src/pfaedle/osm/OsmIdSet.cpp b/src/pfaedle/osm/OsmIdSet.cpp index 6566692..753c810 100644 --- a/src/pfaedle/osm/OsmIdSet.cpp +++ b/src/pfaedle/osm/OsmIdSet.cpp @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -13,7 +12,10 @@ #include #include #include +#include +#include "pfaedle/Def.h" #include "pfaedle/osm/OsmIdSet.h" +#include "util/3rdparty/MurmurHash3.h" using pfaedle::osm::OsmIdSet; @@ -27,35 +29,61 @@ OsmIdSet::OsmIdSet() _last(0), _smallest(-1), _biggest(0), + _hasInv(false), _obufpos(0), _curBlock(-1), _fsize(0) { _bitset = new std::bitset(); + _bitsetNotIn = new std::bitset(); _file = openTmpFile(); _buffer = new unsigned char[BUFFER_S]; - _outBuffer = new unsigned char[OBUFFER_S]; + _outBuffer = new unsigned char[BUFFER_S]; } // _____________________________________________________________________________ OsmIdSet::~OsmIdSet() { delete _bitset; + delete _bitsetNotIn; delete[] _buffer; if (!_closed) delete[] _outBuffer; } +// _____________________________________________________________________________ +void OsmIdSet::nadd(osmid id) { + if (_closed) throw std::exception(); + + _hasInv = true; + + uint32_t h1, h2; + MurmurHash3_x86_32(&id, 8, 469954432, &h1); + h2 = jenkins(id); + + for (int i = 0; i < 5; i++) { + uint32_t h = (h1 + i * h2) % BLOOMF_BITS; + (*_bitsetNotIn)[h] = 1; + } +} + // _____________________________________________________________________________ void OsmIdSet::add(osmid id) { if (_closed) throw std::exception(); + diskAdd(id); - // _set.insert(id); if (_last > id) _sorted = false; _last = id; if (id < _smallest) _smallest = id; if (id > _biggest) _biggest = id; - for (int i = 0; i < 10; i++) (*_bitset)[hash(id, i)] = 1; + uint32_t h1, h2; + MurmurHash3_x86_32(&id, 8, 469954432, &h1); + h2 = jenkins(id); + + for (int i = 0; i < 5; i++) { + uint32_t h = (h1 + i * h2) % BLOOMF_BITS; + (*_bitset)[h] = 1; + } } // _____________________________________________________________________________ @@ -69,8 +97,8 @@ void OsmIdSet::diskAdd(osmid id) { _blockEnds.push_back(id); } - if (_obufpos >= OBUFFER_S) { - ssize_t w = cwrite(_file, _outBuffer, OBUFFER_S); + if (_obufpos >= BUFFER_S) { + ssize_t w = cwrite(_file, _outBuffer, BUFFER_S); _fsize += w; _obufpos = 0; } @@ -86,7 +114,8 @@ size_t OsmIdSet::getBlock(osmid id) const { bool OsmIdSet::diskHas(osmid id) const { assert(_sorted); - if (std::find(_blockEnds.begin(), _blockEnds.end(), id) != _blockEnds.end()) { + auto a = std::lower_bound(_blockEnds.begin(), _blockEnds.end(), id); + if (a != _blockEnds.end() && *a == id) { return true; } @@ -125,16 +154,26 @@ bool OsmIdSet::has(osmid id) const { LOOKUPS++; if (!_closed) close(); + // trivial cases if (id < _smallest || id > _biggest) { return false; } - for (int i = 0; i < 10; i++) { - if ((*_bitset)[hash(id, i)] == 0) return false; + uint32_t h1, h2; + MurmurHash3_x86_32(&id, 8, 469954432, &h1); + h2 = jenkins(id); + + for (int i = 0; i < 5; i++) { + uint32_t h = (h1 + i * h2) % BLOOMF_BITS; + if ((*_bitset)[h] == 0) { + return false; + } + if (_hasInv && (*_bitsetNotIn)[h] == 0) { + return true; + } } bool has = diskHas(id); - // assert(has == (bool)_set.count(id)); return has; } @@ -232,7 +271,7 @@ void OsmIdSet::sort() const { size_t OsmIdSet::cwrite(int f, const void* buf, size_t n) const { ssize_t w = write(f, buf, n); if (w < 0) { - throw std::runtime_error("OSMIDSET: could not write to tmp file.\n"); + throw std::runtime_error("Could not write to tmp file.\n"); } return w; @@ -242,7 +281,7 @@ size_t OsmIdSet::cwrite(int f, const void* buf, size_t n) const { size_t OsmIdSet::cread(int f, void* buf, size_t n) const { ssize_t w = read(f, buf, n); if (w < 0) { - throw std::runtime_error("OSMIDSET: could not read from tmp file.\n"); + throw std::runtime_error("Could not read from tmp file.\n"); } return w; @@ -250,8 +289,8 @@ size_t OsmIdSet::cread(int f, void* buf, size_t n) const { // _____________________________________________________________________________ uint32_t OsmIdSet::knuth(uint32_t in) const { - const uint32_t prime = 2654435769; - return (in * prime) >> 2; + const uint32_t a = 2654435769; + return (in * a) >> 2; } // _____________________________________________________________________________ @@ -265,14 +304,9 @@ uint32_t OsmIdSet::jenkins(uint32_t in) const { return in >> 2; } -// _____________________________________________________________________________ -uint32_t OsmIdSet::hash(uint32_t in, int i) const { - return (knuth(in) + jenkins(in) * i) % BLOOMF_BITS; -} - // _____________________________________________________________________________ int OsmIdSet::openTmpFile() const { - const std::string& fname = getFName(); + const std::string& fname = util::getTmpFName("", ".pfaedle-tmp", ""); int file = open(fname.c_str(), O_RDWR | O_CREAT, 0666); // immediately unlink @@ -283,20 +317,8 @@ int OsmIdSet::openTmpFile() const { exit(1); } +#ifdef __unix__ posix_fadvise(file, 0, 0, POSIX_FADV_SEQUENTIAL); +#endif return file; } - -// _____________________________________________________________________________ -std::string OsmIdSet::getFName() const { - std::string f = ".pfaedle-tmp"; - - while (access(f.c_str(), F_OK) != -1) { - std::stringstream ss; - ss << ".pfaedle-tmp-"; - ss << std::rand(); - f = ss.str().c_str(); - } - - return f; -} diff --git a/src/pfaedle/osm/OsmIdSet.h b/src/pfaedle/osm/OsmIdSet.h index f62d4b4..1a16b61 100644 --- a/src/pfaedle/osm/OsmIdSet.h +++ b/src/pfaedle/osm/OsmIdSet.h @@ -13,6 +13,10 @@ #include #include "pfaedle/osm/Osm.h" +#ifndef POSIX_FADV_SEQUENTIAL +#define POSIX_FADV_SEQUENTIAL 2 +#endif + namespace pfaedle { namespace osm { @@ -21,7 +25,7 @@ static const size_t BUFFER_S = 8 * 64 * 1024; static const size_t SORT_BUFFER_S = 8 * 64 * 1024; static const size_t OBUFFER_S = 8 * 1024 * 1024; -#define BLOOMF_BITS 400000000 +#define BLOOMF_BITS 214748357 /* * A disk-based set for OSM ids. Read-access for checking the presence is @@ -35,6 +39,9 @@ class OsmIdSet { // Add an OSM id void add(osmid id); + // Add an OSM id that is NOT contained + void nadd(osmid id); + // Check if an OSM id is contained bool has(osmid id) const; @@ -43,7 +50,7 @@ class OsmIdSet { static size_t FLOOKUPS; private: - std::set _set; + std::string _tmpPath; mutable bool _closed; mutable int _file; unsigned char* _buffer; @@ -53,6 +60,8 @@ class OsmIdSet { osmid _smallest; osmid _biggest; + bool _hasInv; + size_t _obufpos; mutable size_t _curBlock; mutable ssize_t _curBlockSize; @@ -60,13 +69,14 @@ class OsmIdSet { // bloom filter std::bitset* _bitset; + std::bitset* _bitsetNotIn; + mutable std::vector _blockEnds; mutable size_t _fsize; uint32_t knuth(uint32_t in) const; uint32_t jenkins(uint32_t in) const; - uint32_t hash(uint32_t in, int i) const; void diskAdd(osmid id); void close() const; void sort() const; diff --git a/src/pfaedle/osm/OsmReadOpts.h b/src/pfaedle/osm/OsmReadOpts.h index f4d3924..5678c51 100644 --- a/src/pfaedle/osm/OsmReadOpts.h +++ b/src/pfaedle/osm/OsmReadOpts.h @@ -5,14 +5,14 @@ #ifndef PFAEDLE_OSM_OSMREADOPTS_H_ #define PFAEDLE_OSM_OSMREADOPTS_H_ +#include #include -#include +#include #include #include -#include +#include #include #include -#include #include "pfaedle/osm/Osm.h" #include "pfaedle/trgraph/Graph.h" #include "pfaedle/trgraph/Normalizer.h" @@ -77,54 +77,42 @@ struct RelLineRules { AttrLst sNameRule; AttrLst fromNameRule; AttrLst toNameRule; + AttrLst colorRule; }; inline bool operator==(const RelLineRules& a, const RelLineRules& b) { return a.sNameRule == b.sNameRule && a.fromNameRule == b.fromNameRule && - a.toNameRule == b.toNameRule; + a.toNameRule == b.toNameRule && a.colorRule == b.colorRule; } struct StationAttrRules { DeepAttrLst nameRule; DeepAttrLst platformRule; + DeepAttrLst idRule; }; inline bool operator==(const StationAttrRules& a, const StationAttrRules& b) { return a.nameRule == b.nameRule && a.platformRule == b.platformRule; } -struct StatGroupNAttrRule { - DeepAttrRule attr; - double maxDist; -}; - -inline bool operator==(const StatGroupNAttrRule& a, - const StatGroupNAttrRule& b) { - return a.attr == b.attr && a.maxDist == b.maxDist; -} - -typedef std::unordered_map< - std::string, - std::unordered_map>> - StAttrGroups; - struct OsmReadOpts { OsmReadOpts() {} MultAttrMap noHupFilter; MultAttrMap keepFilter; - MultAttrMap levelFilters[7]; + MultAttrMap levelFilters[8]; MultAttrMap dropFilter; MultAttrMap oneWayFilter; MultAttrMap oneWayFilterRev; MultAttrMap twoWayFilter; MultAttrMap stationFilter; MultAttrMap stationBlockerFilter; - std::vector statGroupNAttrRules; + MultAttrMap turnCycleFilter; trgraph::Normalizer statNormzer; trgraph::Normalizer lineNormzer; trgraph::Normalizer trackNormzer; + trgraph::Normalizer idNormzer; RelLineRules relLinerules; StationAttrRules statAttrRules; @@ -134,15 +122,25 @@ struct OsmReadOpts { uint8_t maxSnapLevel; double maxAngleSnapReach; - std::vector maxSnapDistances; - double maxSnapFallbackHeurDistance; - double maxGroupSearchDistance; + double maxSnapDistance; + double maxStationCandDistance; double maxBlockDistance; - double maxOsmStationDistance; + double maxSpeed; + double maxSpeedCorFac; - // TODO(patrick): this is not implemented yet - double levelSnapPunishFac[7] = {0, 0, 0, 0, 0, 0, 0}; + std::vector maxOsmStationDistances; + + // given in km/h, but store in m/s + double levelDefSpeed[8] = {85 * 0.2777, 70 * 0.2777, 55 * 0.2777, 40 * 0.2777, + 30 * 0.2777, 20 * 0.2777, 10 * 0.2777, 5 * 0.2777}; + + double oneWaySpeedPen; + double oneWayEntryCost; + + double noLinesPunishFact; + + double fullTurnAngle; // restriction system MultAttrMap restrPosRestr; @@ -151,9 +149,10 @@ struct OsmReadOpts { }; inline bool operator==(const OsmReadOpts& a, const OsmReadOpts& b) { - if (a.maxSnapDistances.size() != b.maxSnapDistances.size()) return false; - for (size_t i = 0; i < a.maxSnapDistances.size(); i++) { - if (fabs(a.maxSnapDistances[i] - b.maxSnapDistances[i]) >= 0.1) + if (a.maxOsmStationDistances.size() != b.maxOsmStationDistances.size()) + return false; + for (size_t i = 0; i < a.maxOsmStationDistances.size(); i++) { + if (fabs(a.maxOsmStationDistances[i] - b.maxOsmStationDistances[i]) >= 0.1) return false; } @@ -170,24 +169,29 @@ inline bool operator==(const OsmReadOpts& a, const OsmReadOpts& b) { a.twoWayFilter == b.twoWayFilter && a.stationFilter == b.stationFilter && a.stationBlockerFilter == b.stationBlockerFilter && - a.statGroupNAttrRules == b.statGroupNAttrRules && + a.turnCycleFilter == b.turnCycleFilter && a.statNormzer == b.statNormzer && a.lineNormzer == b.lineNormzer && a.trackNormzer == b.trackNormzer && a.relLinerules == b.relLinerules && a.statAttrRules == b.statAttrRules && a.maxSnapLevel == b.maxSnapLevel && fabs(a.maxAngleSnapReach - b.maxAngleSnapReach) < 0.1 && - fabs(a.maxOsmStationDistance - b.maxOsmStationDistance) < 0.1 && - fabs(a.maxSnapFallbackHeurDistance - b.maxSnapFallbackHeurDistance) < - 0.1 && - fabs(a.maxGroupSearchDistance - b.maxGroupSearchDistance) < 0.1 && + fabs(a.maxSnapDistance - b.maxSnapDistance) < 0.1 && + fabs(a.maxStationCandDistance - b.maxStationCandDistance) < 0.1 && fabs(a.maxBlockDistance - b.maxBlockDistance) < 0.1 && - fabs(a.levelSnapPunishFac[0] - b.levelSnapPunishFac[0]) < 0.1 && - fabs(a.levelSnapPunishFac[1] - b.levelSnapPunishFac[1]) < 0.1 && - fabs(a.levelSnapPunishFac[2] - b.levelSnapPunishFac[2]) < 0.1 && - fabs(a.levelSnapPunishFac[3] - b.levelSnapPunishFac[3]) < 0.1 && - fabs(a.levelSnapPunishFac[4] - b.levelSnapPunishFac[4]) < 0.1 && - fabs(a.levelSnapPunishFac[5] - b.levelSnapPunishFac[5]) < 0.1 && - fabs(a.levelSnapPunishFac[6] - b.levelSnapPunishFac[6]) < 0.1 && + fabs(a.levelDefSpeed[0] - b.levelDefSpeed[0]) < 0.1 && + fabs(a.levelDefSpeed[1] - b.levelDefSpeed[1]) < 0.1 && + fabs(a.levelDefSpeed[2] - b.levelDefSpeed[2]) < 0.1 && + fabs(a.levelDefSpeed[3] - b.levelDefSpeed[3]) < 0.1 && + fabs(a.levelDefSpeed[4] - b.levelDefSpeed[4]) < 0.1 && + fabs(a.levelDefSpeed[5] - b.levelDefSpeed[5]) < 0.1 && + fabs(a.levelDefSpeed[6] - b.levelDefSpeed[6]) < 0.1 && + fabs(a.levelDefSpeed[7] - b.levelDefSpeed[7]) < 0.1 && + fabs(a.oneWaySpeedPen - b.oneWaySpeedPen) < 0.1 && + fabs(a.oneWayEntryCost - b.oneWayEntryCost) < 0.1 && + fabs(a.noLinesPunishFact - b.noLinesPunishFact) < 0.1 && + fabs(a.fullTurnAngle - b.fullTurnAngle) < 0.1 && + fabs(a.maxSpeedCorFac - b.maxSpeedCorFac) < 0.1 && + fabs(a.maxSpeed - b.maxSpeed) < 0.1 && a.restrPosRestr == b.restrPosRestr && a.restrNegRestr == b.restrNegRestr && a.noRestrFilter == b.noRestrFilter; diff --git a/src/pfaedle/router/Comp.h b/src/pfaedle/router/Comp.h index 875c7d4..cdadc21 100644 --- a/src/pfaedle/router/Comp.h +++ b/src/pfaedle/router/Comp.h @@ -16,7 +16,7 @@ namespace router { using util::editDist; // _____________________________________________________________________________ -inline double statSimi(const std::string& a, const std::string& b) { +inline bool statSimi(const std::string& a, const std::string& b) { if (a == b) return 1; if (a.empty() || b.empty()) return 0; @@ -55,16 +55,33 @@ inline double statSimi(const std::string& a, const std::string& b) { } // _____________________________________________________________________________ -inline double lineSimi(const std::string& a, const std::string& b) { +inline bool lineSimi(const std::string& a, const std::string& b) { if (a == b) return 1; if (a.empty() || b.empty()) return 0; - // if one of the lines is completely contained in the other, return 1 - if (a.find(b) != std::string::npos) { - return 1; - } else if (b.find(a) != std::string::npos) { - return 1; + if (a.size() > b.size() + 1) { + // check if a begins with b + if (a.compare(0, b.size() + 1, b + " ") == 0) { + return 1; + } + + // check if a ends with b + if (a.compare(a.size() - (b.size() + 1), b.size() + 1, " " + b) == 0) { + return 1; + } + } + + if (b.size() > a.size() + 1) { + // check if b begins with a + if (b.compare(0, a.size() + 1, a + " ") == 0) { + return 1; + } + + // check if b ends with a + if (b.compare(b.size() - (a.size() + 1), a.size() + 1, " " + a) == 0) { + return 1; + } } return 0; diff --git a/src/pfaedle/router/EdgePL.cpp b/src/pfaedle/router/EdgePL.cpp deleted file mode 100644 index 64c8f15..0000000 --- a/src/pfaedle/router/EdgePL.cpp +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#include "pfaedle/router/EdgePL.h" -#include "pfaedle/router/Router.h" -#include "util/String.h" - -using pfaedle::router::EdgePL; -using pfaedle::router::EdgeCost; -using pfaedle::router::EdgeList; -using pfaedle::trgraph::Node; - -// _____________________________________________________________________________ -EdgeList* EdgePL::getEdges() { return &_edges; } - -// _____________________________________________________________________________ -const EdgeList& EdgePL::getEdges() const { return _edges; } - -// _____________________________________________________________________________ -const FPoint& EdgePL::frontHop() const { - if (!_edges.size()) return *_end->pl().getGeom(); - return _edges.back()->pl().frontHop(); -} - -// _____________________________________________________________________________ -const FPoint& EdgePL::backHop() const { - if (!_edges.size()) return *_start->pl().getGeom(); - return _edges.front()->pl().backHop(); -} - -// _____________________________________________________________________________ -const Node* EdgePL::backNode() const { return _end; } - -// _____________________________________________________________________________ -const Node* EdgePL::frontNode() const { return _start; } - -// _____________________________________________________________________________ -const util::geo::FLine* EdgePL::getGeom() const { - if (!_edges.size()) return 0; - if (!_geom.size()) { - const trgraph::Node* l = _start; - for (auto i = _edges.rbegin(); i != _edges.rend(); i++) { - const auto e = *i; - if ((e->getFrom() == l) ^ e->pl().isRev()) { - _geom.insert(_geom.end(), e->pl().getGeom()->begin(), - e->pl().getGeom()->end()); - } else { - _geom.insert(_geom.end(), e->pl().getGeom()->rbegin(), - e->pl().getGeom()->rend()); - } - l = e->getOtherNd(l); - } - } - - return &_geom; -} - -// _____________________________________________________________________________ -void EdgePL::setStartNode(const trgraph::Node* s) { _start = s; } - -// _____________________________________________________________________________ -void EdgePL::setEndNode(const trgraph::Node* e) { _end = e; } - -// _____________________________________________________________________________ -void EdgePL::setStartEdge(const trgraph::Edge* s) { _startE = s; } - -// _____________________________________________________________________________ -void EdgePL::setEndEdge(const trgraph::Edge* e) { _endE = e; } - -// _____________________________________________________________________________ -const EdgeCost& EdgePL::getCost() const { return _cost; } - -// _____________________________________________________________________________ -void EdgePL::setCost(const router::EdgeCost& c) { _cost = c; } - - -// _____________________________________________________________________________ -void EdgePL::getAttrs(std::map* obj) const { - (*obj)["cost"] = std::to_string(_cost.getValue()); - (*obj)["from_edge"] = util::toString(_startE); - (*obj)["to_edge"] = util::toString(_endE); - (*obj)["cost_m_lvl1"] = std::to_string(_cost.meterDistLvl1); - (*obj)["cost_m_lvl0"] = std::to_string(_cost.meterDist); - (*obj)["cost_m_lvl1"] = std::to_string(_cost.meterDistLvl1); - (*obj)["cost_m_lvl2"] = std::to_string(_cost.meterDistLvl2); - (*obj)["cost_m_lvl3"] = std::to_string(_cost.meterDistLvl3); - (*obj)["cost_m_lvl4"] = std::to_string(_cost.meterDistLvl4); - (*obj)["cost_m_lvl5"] = std::to_string(_cost.meterDistLvl5); - (*obj)["cost_m_lvl6"] = std::to_string(_cost.meterDistLvl6); - (*obj)["cost_m_lvl7"] = std::to_string(_cost.meterDistLvl7); - (*obj)["cost_fullturn"] = std::to_string(_cost.fullTurns); - (*obj)["cost_st_passthru"] = std::to_string(_cost.passThruStations); - (*obj)["cost_m_oneway"] = std::to_string(_cost.oneWayMeters); - (*obj)["cost_m_lineunmatch"] = std::to_string(_cost.lineUnmatchedMeters); - (*obj)["cost_reach_node_pen"] = std::to_string(_cost.reachPen); - (*obj)["cost_oneway_event"] = std::to_string(_cost.oneWayEdges); - (*obj)["dummy"] = _edges.size() ? "no" : "yes"; -} diff --git a/src/pfaedle/router/EdgePL.h b/src/pfaedle/router/EdgePL.h deleted file mode 100644 index 4838601..0000000 --- a/src/pfaedle/router/EdgePL.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef PFAEDLE_ROUTER_EDGEPL_H_ -#define PFAEDLE_ROUTER_EDGEPL_H_ - -#include -#include -#include "pfaedle/router/Misc.h" -#include "util/geo/GeoGraph.h" - -using util::geograph::GeoEdgePL; - -namespace pfaedle { -namespace router { - -class EdgePL : public GeoEdgePL { - public: - EdgePL() : _cost(), _start(0), _end(0), _startE(0), _endE(0) {} - const util::geo::FLine* getGeom() const; - void getAttrs(std::map* attrs) const; - router::EdgeList* getEdges(); - const router::EdgeList& getEdges() const; - void setStartNode(const trgraph::Node* s); - void setEndNode(const trgraph::Node* s); - void setStartEdge(const trgraph::Edge* s); - void setEndEdge(const trgraph::Edge* s); - const router::EdgeCost& getCost() const; - void setCost(const router::EdgeCost& c); - const FPoint& frontHop() const; - const FPoint& backHop() const; - const trgraph::Node* frontNode() const; - const trgraph::Node* backNode() const; - - private: - router::EdgeCost _cost; - // the edges are in this field in REVERSED ORDER! - router::EdgeList _edges; - const trgraph::Node* _start; - const trgraph::Node* _end; - const trgraph::Edge* _startE; - const trgraph::Edge* _endE; - mutable util::geo::FLine _geom; -}; -} // namespace router -} // namespace pfaedle - -#endif // PFAEDLE_ROUTER_EDGEPL_H_ diff --git a/src/pfaedle/router/Graph.h b/src/pfaedle/router/Graph.h deleted file mode 100644 index 88d7345..0000000 --- a/src/pfaedle/router/Graph.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef PFAEDLE_ROUTER_GRAPH_H_ -#define PFAEDLE_ROUTER_GRAPH_H_ - -#include "pfaedle/trgraph/Graph.h" -#include "pfaedle/router/EdgePL.h" -#include "pfaedle/router/NodePL.h" -#include "util/graph/DirGraph.h" - -using util::geo::Point; -using util::geo::Line; - -namespace pfaedle { -namespace router { - -typedef util::graph::Edge Edge; -typedef util::graph::Node Node; -typedef util::graph::DirGraph Graph; - -} // namespace router -} // namespace pfaedle - -#endif // PFAEDLE_ROUTER_GRAPH_H_ diff --git a/src/pfaedle/router/HopCache.cpp b/src/pfaedle/router/HopCache.cpp new file mode 100644 index 0000000..bb53290 --- /dev/null +++ b/src/pfaedle/router/HopCache.cpp @@ -0,0 +1,40 @@ +// Copyright 2020, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#include +#include +#include "pfaedle/router/HopCache.h" +#include "pfaedle/trgraph/Graph.h" +#include "util/Misc.h" + +using pfaedle::router::HopCache; +using pfaedle::trgraph::Edge; + +// _____________________________________________________________________________ +void HopCache::setMin(const Edge* a, const Edge* b, uint32_t val) { + _cache.set(a, b, val); +} + +// _____________________________________________________________________________ +void HopCache::setEx(const Edge* a, const Edge* b, uint32_t val) { + int64_t v = val; + _cache.set(a, b, -(v + 1)); +} + +// _____________________________________________________________________________ +void HopCache::setMin(const Edge* a, const std::set& b, uint32_t val) { + for (auto eb : b) _cache.set(a, eb, val); +} + +// _____________________________________________________________________________ +void HopCache::setMin(const std::set& a, const Edge* b, uint32_t val) { + for (auto ea : a) _cache.set(ea, b, val); +} + +// _____________________________________________________________________________ +std::pair HopCache::get(const Edge* a, const Edge* b) const { + int64_t v = _cache.get(a, b); + if (v < 0) return {(-v) - 1, 1}; + return {v, 0}; +} diff --git a/src/pfaedle/router/HopCache.h b/src/pfaedle/router/HopCache.h new file mode 100644 index 0000000..43d17af --- /dev/null +++ b/src/pfaedle/router/HopCache.h @@ -0,0 +1,39 @@ +// Copyright 2020, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_ROUTER_HOPCACHE_H_ +#define PFAEDLE_ROUTER_HOPCACHE_H_ + +#include +#include +#include +#include "pfaedle/trgraph/Graph.h" +#include "util/Misc.h" + +namespace pfaedle { +namespace router { + +class HopCache { + public: + void setMin(const trgraph::Edge* a, const trgraph::Edge* b, uint32_t val); + + void setMin(const trgraph::Edge* a, const std::set& b, + uint32_t val); + + void setMin(const std::set& a, const trgraph::Edge* b, + uint32_t val); + + void setEx(const trgraph::Edge* a, const trgraph::Edge* b, uint32_t val); + + std::pair get(const trgraph::Edge* a, + const trgraph::Edge* b) const; + + private: + util::SparseMatrix _cache; +}; + +} // namespace router +} // namespace pfaedle + +#endif // PFAEDLE_ROUTER_HOPCACHE_H_ diff --git a/src/pfaedle/router/Misc.h b/src/pfaedle/router/Misc.h index fc97cf7..f6fc197 100644 --- a/src/pfaedle/router/Misc.h +++ b/src/pfaedle/router/Misc.h @@ -7,11 +7,14 @@ #include #include -#include #include +#include + #include "ad/cppgtfs/gtfs/Feed.h" #include "ad/cppgtfs/gtfs/Route.h" +#include "pfaedle/gtfs/Feed.h" #include "pfaedle/trgraph/Graph.h" +#include "util/Nullable.h" using ad::cppgtfs::gtfs::Route; using ad::cppgtfs::gtfs::Stop; @@ -19,178 +22,161 @@ using ad::cppgtfs::gtfs::Stop; namespace pfaedle { namespace router { -struct NodeCand { - trgraph::Node* nd; +extern double time; + +struct EdgeCand { + trgraph::Edge* e; double pen; + double progr; + POINT point; + int time; + std::vector depPrede; }; struct RoutingOpts { RoutingOpts() - : fullTurnPunishFac(2000), + : fullTurnPunishFac(1000), fullTurnAngle(45), - passThruStationsPunish(100), - oneWayPunishFac(1), - oneWayEdgePunish(0), - lineUnmatchedPunishFact(0.5), + lineUnmatchedPunishFact(1), + lineNameFromUnmatchedPunishFact(1), + lineNameToUnmatchedPunishFact(1), + noLinesPunishFact(1), platformUnmatchedPen(0), - stationDistPenFactor(0) {} - double fullTurnPunishFac; + stationDistPenFactor(0), + turnRestrCost(0), + popReachEdge(true), + noSelfHops(true) {} + uint32_t fullTurnPunishFac; double fullTurnAngle; - double passThruStationsPunish; - double oneWayPunishFac; - double oneWayEdgePunish; double lineUnmatchedPunishFact; + double lineNameFromUnmatchedPunishFact; + double lineNameToUnmatchedPunishFact; + double noLinesPunishFact; double platformUnmatchedPen; + double stationUnmatchedPen; double stationDistPenFactor; - double nonOsmPen; - double levelPunish[8]; + double nonStationPen; + uint32_t turnRestrCost; + bool popReachEdge; + bool noSelfHops; + bool useStations; + double transitionPen; + std::string transPenMethod; + std::string emPenMethod; + std::string statsimiMethod; }; +// _____________________________________________________________________________ inline bool operator==(const RoutingOpts& a, const RoutingOpts& b) { - return fabs(a.fullTurnPunishFac - b.fullTurnPunishFac) < 0.01 && + return a.fullTurnPunishFac == b.fullTurnPunishFac && fabs(a.fullTurnAngle - b.fullTurnAngle) < 0.01 && - fabs(a.passThruStationsPunish - b.passThruStationsPunish) < 0.01 && - fabs(a.oneWayPunishFac - b.oneWayPunishFac) < 0.01 && - fabs(a.oneWayEdgePunish - b.oneWayEdgePunish) < 0.01 && fabs(a.lineUnmatchedPunishFact - b.lineUnmatchedPunishFact) < 0.01 && + fabs(a.lineNameFromUnmatchedPunishFact - + b.lineNameFromUnmatchedPunishFact) < 0.01 && + fabs(a.lineNameToUnmatchedPunishFact - + b.lineNameToUnmatchedPunishFact) < 0.01 && + fabs(a.noLinesPunishFact - b.noLinesPunishFact) < 0.01 && fabs(a.platformUnmatchedPen - b.platformUnmatchedPen) < 0.01 && + fabs(a.stationUnmatchedPen - b.stationUnmatchedPen) < 0.01 && fabs(a.stationDistPenFactor - b.stationDistPenFactor) < 0.01 && - fabs(a.nonOsmPen - b.nonOsmPen) < 0.01 && - fabs(a.levelPunish[0] - b.levelPunish[0]) < 0.01 && - fabs(a.levelPunish[1] - b.levelPunish[1]) < 0.01 && - fabs(a.levelPunish[2] - b.levelPunish[2]) < 0.01 && - fabs(a.levelPunish[3] - b.levelPunish[3]) < 0.01 && - fabs(a.levelPunish[4] - b.levelPunish[4]) < 0.01 && - fabs(a.levelPunish[5] - b.levelPunish[5]) < 0.01 && - fabs(a.levelPunish[6] - b.levelPunish[6]) < 0.01 && - fabs(a.levelPunish[7] - b.levelPunish[7]) < 0.01; -} - -struct EdgeCost { - EdgeCost() - : meterDist(0), - meterDistLvl1(0), - meterDistLvl2(0), - meterDistLvl3(0), - meterDistLvl4(0), - meterDistLvl5(0), - meterDistLvl6(0), - meterDistLvl7(0), - fullTurns(0), - passThruStations(0), - oneWayMeters(0), - oneWayEdges(0), - lineUnmatchedMeters(0), - reachPen(0), - o(0) {} - EdgeCost(double mDist, double mDistLvl1, double mDistLvl2, double mDistLvl3, - double mDistLvl4, double mDistLvl5, double mDistLvl6, - double mDistLvl7, uint32_t fullTurns, int32_t passThru, - double oneWayMeters, size_t oneWayEdges, double lineUnmatchedMeters, - double reachPen, const RoutingOpts* o) - : meterDist(mDist), - meterDistLvl1(mDistLvl1), - meterDistLvl2(mDistLvl2), - meterDistLvl3(mDistLvl3), - meterDistLvl4(mDistLvl4), - meterDistLvl5(mDistLvl5), - meterDistLvl6(mDistLvl6), - meterDistLvl7(mDistLvl7), - fullTurns(fullTurns), - passThruStations(passThru), - oneWayMeters(oneWayMeters), - oneWayEdges(oneWayEdges), - lineUnmatchedMeters(lineUnmatchedMeters), - reachPen(reachPen), - o(o) {} - double meterDist; - double meterDistLvl1; - double meterDistLvl2; - double meterDistLvl3; - double meterDistLvl4; - double meterDistLvl5; - double meterDistLvl6; - double meterDistLvl7; - uint32_t fullTurns; - int32_t passThruStations; - double oneWayMeters; - size_t oneWayEdges; - double lineUnmatchedMeters; - double reachPen; - const RoutingOpts* o; - - double getValue() const { - if (!o) return meterDist + reachPen; - return meterDist * o->levelPunish[0] + meterDistLvl1 * o->levelPunish[1] + - meterDistLvl2 * o->levelPunish[2] + - meterDistLvl3 * o->levelPunish[3] + - meterDistLvl4 * o->levelPunish[4] + - meterDistLvl5 * o->levelPunish[5] + - meterDistLvl6 * o->levelPunish[6] + - meterDistLvl7 * o->levelPunish[7] + - oneWayMeters * o->oneWayPunishFac + - oneWayEdges * o->oneWayEdgePunish + - lineUnmatchedMeters * o->lineUnmatchedPunishFact + - fullTurns * o->fullTurnPunishFac + - passThruStations * o->passThruStationsPunish + reachPen; - } - - double getTotalMeters() const { - return meterDist + meterDistLvl1 + meterDistLvl2 + meterDistLvl3 + - meterDistLvl4 + meterDistLvl5 + meterDistLvl6 + meterDistLvl7; - } -}; - -inline EdgeCost operator+(const EdgeCost& a, const EdgeCost& b) { - return EdgeCost( - a.meterDist + b.meterDist, a.meterDistLvl1 + b.meterDistLvl1, - a.meterDistLvl2 + b.meterDistLvl2, a.meterDistLvl3 + b.meterDistLvl3, - a.meterDistLvl4 + b.meterDistLvl4, a.meterDistLvl5 + b.meterDistLvl5, - a.meterDistLvl6 + b.meterDistLvl6, a.meterDistLvl7 + b.meterDistLvl7, - a.fullTurns + b.fullTurns, a.passThruStations + b.passThruStations, - a.oneWayMeters + b.oneWayMeters, a.oneWayEdges + b.oneWayEdges, - a.lineUnmatchedMeters + b.lineUnmatchedMeters, a.reachPen + b.reachPen, - a.o ? a.o : b.o); -} - -inline bool operator<=(const EdgeCost& a, const EdgeCost& b) { - return a.getValue() <= b.getValue(); -} - -inline bool operator==(const EdgeCost& a, const EdgeCost& b) { - return a.getValue() == b.getValue(); -} - -inline bool operator>(const EdgeCost& a, const EdgeCost& b) { - return a.getValue() > b.getValue(); -} - - -inline int angSmaller(const FPoint& f, const FPoint& m, const FPoint& t, - double ang) { - if (util::geo::innerProd(m, f, t) < ang) return 1; - return 0; + a.turnRestrCost == b.turnRestrCost && + fabs(a.transitionPen - b.transitionPen) < 0.01 && + fabs(a.nonStationPen - b.nonStationPen) < 0.01 && + a.transPenMethod == b.transPenMethod && + a.emPenMethod == b.emPenMethod && + a.statsimiMethod == b.statsimiMethod && + a.useStations == b.useStations && a.popReachEdge == b.popReachEdge && + a.noSelfHops == b.noSelfHops; } typedef std::set NodeSet; typedef std::set EdgeSet; typedef std::unordered_map FeedStops; -typedef std::vector NodeCandGroup; -typedef std::vector NodeCandRoute; +typedef std::vector EdgeCandGroup; +typedef std::vector EdgeCandMap; +typedef std::vector EdgeCandRoute; typedef std::vector EdgeList; typedef std::vector NodeList; struct EdgeListHop { EdgeList edges; - const trgraph::Node* start; - const trgraph::Node* end; + const trgraph::Edge* start; + const trgraph::Edge* end; + double progrStart; + double progrEnd; + POINT pointStart; + POINT pointEnd; }; typedef std::vector EdgeListHops; typedef std::set MOTs; + +// _____________________________________________________________________________ +inline MOTs motISect(const MOTs& a, const MOTs& b) { + MOTs ret; + for (auto mot : a) + if (b.count(mot)) ret.insert(mot); + return ret; +} + +// _____________________________________________________________________________ +inline pfaedle::router::FeedStops writeMotStops(const pfaedle::gtfs::Feed* feed, + const MOTs mots, + const std::string& tid) { + pfaedle::router::FeedStops ret; + for (auto t : feed->getTrips()) { + if (!tid.empty() && t.getId() != tid) continue; + if (mots.count(t.getRoute()->getType())) { + for (auto st : t.getStopTimes()) { + // if the station has type STATION_ENTRANCE, use the parent + // station for routing. Normally, this should not occur, as + // this is not allowed in stop_times.txt + if (st.getStop()->getLocationType() == + ad::cppgtfs::gtfs::flat::Stop::STATION_ENTRANCE && + st.getStop()->getParentStation()) { + ret[st.getStop()->getParentStation()] = 0; + } else { + ret[st.getStop()] = 0; + } + } + } + } + return ret; +} + +// _____________________________________________________________________________ +inline std::string getMotStr(const MOTs& mots) { + MOTs tmp = mots; + bool first = false; + std::string motStr; + + std::string names[11] = {"tram", "subway", "rail", "bus", + "ferry", "cablecar", "gondola", "funicular", + "coach", "trolleybus", "monorail"}; + + for (const auto& n : names) { + const auto& types = ad::cppgtfs::gtfs::flat::Route::getTypesFromString(n); + const auto& isect = motISect(tmp, types); + + if (isect.size() == types.size()) { + if (first) motStr += ", "; + motStr += "{" + n + "}"; + first = true; + for (const auto& mot : isect) tmp.erase(mot); + } + } + + for (const auto& mot : tmp) { + if (first) motStr += ", "; + motStr += "<" + ad::cppgtfs::gtfs::flat::Route::getTypeString(mot) + ">"; + first = true; + } + + return motStr; +} } // namespace router } // namespace pfaedle diff --git a/src/pfaedle/router/NodePL.h b/src/pfaedle/router/NodePL.h deleted file mode 100644 index 1ab743c..0000000 --- a/src/pfaedle/router/NodePL.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef PFAEDLE_ROUTER_NODEPL_H_ -#define PFAEDLE_ROUTER_NODEPL_H_ - -#include -#include -#include "pfaedle/trgraph/Graph.h" -#include "util/geo/GeoGraph.h" - -using util::geograph::GeoNodePL; - -namespace pfaedle { -namespace router { - -class NodePL : public GeoNodePL { - public: - NodePL() : _n(0) {} - NodePL(const pfaedle::trgraph::Node* n) : _n(n) {} // NOLINT - - const util::geo::FPoint* getGeom() const { - return !_n ? 0 : _n->pl().getGeom(); - } - void getAttrs(std::map* attrs) const { - if (_n) _n->pl().getAttrs(attrs); - } - - private: - const pfaedle::trgraph::Node* _n; -}; -} // namespace router -} // namespace pfaedle - -#endif // PFAEDLE_ROUTER_NODEPL_H_ diff --git a/src/pfaedle/router/Router.cpp b/src/pfaedle/router/Router.cpp deleted file mode 100644 index 5cac1cb..0000000 --- a/src/pfaedle/router/Router.cpp +++ /dev/null @@ -1,615 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "pfaedle/router/Comp.h" -#include "pfaedle/router/Router.h" -#include "pfaedle/router/RoutingAttrs.h" -#include "util/geo/output/GeoGraphJsonOutput.h" -#include "util/graph/Dijkstra.h" -#include "util/graph/EDijkstra.h" -#include "util/log/Log.h" - -using pfaedle::router::Router; -using pfaedle::router::EdgeCost; -using pfaedle::router::CostFunc; -using pfaedle::router::DistHeur; -using pfaedle::router::NCostFunc; -using pfaedle::router::NDistHeur; -using pfaedle::router::CombCostFunc; -using pfaedle::router::EdgeListHop; -using pfaedle::router::EdgeListHops; -using pfaedle::router::RoutingOpts; -using pfaedle::router::RoutingAttrs; -using pfaedle::router::HopBand; -using pfaedle::router::NodeCandRoute; -using util::graph::EDijkstra; -using util::graph::Dijkstra; - -// _____________________________________________________________________________ -EdgeCost NCostFunc::operator()(const trgraph::Node* from, - const trgraph::Edge* e, - const trgraph::Node* to) const { - UNUSED(to); - if (!from) return EdgeCost(); - - int oneway = e->pl().oneWay() == 2; - int32_t stationSkip = 0; - - double transitLinePen = 0; // transitLineCmp(e->pl()); - - return EdgeCost(e->pl().lvl() == 0 ? e->pl().getLength() : 0, - e->pl().lvl() == 1 ? e->pl().getLength() : 0, - e->pl().lvl() == 2 ? e->pl().getLength() : 0, - e->pl().lvl() == 3 ? e->pl().getLength() : 0, - e->pl().lvl() == 4 ? e->pl().getLength() : 0, - e->pl().lvl() == 5 ? e->pl().getLength() : 0, - e->pl().lvl() == 6 ? e->pl().getLength() : 0, - e->pl().lvl() == 7 ? e->pl().getLength() : 0, 0, stationSkip, - e->pl().getLength() * oneway, oneway, - e->pl().getLength() * transitLinePen, 0, &_rOpts); -} - -// _____________________________________________________________________________ -EdgeCost CostFunc::operator()(const trgraph::Edge* from, const trgraph::Node* n, - const trgraph::Edge* to) const { - if (!from) return EdgeCost(); - - uint32_t fullTurns = 0; - int oneway = from->pl().oneWay() == 2; - int32_t stationSkip = 0; - - if (n) { - if (from->getFrom() == to->getTo() && from->getTo() == to->getFrom()) { - // trivial full turn - fullTurns = 1; - } else if (n->getDeg() > 2) { - // otherwise, only intersection angles will be punished - fullTurns = router::angSmaller(from->pl().backHop(), *n->pl().getGeom(), - to->pl().frontHop(), _rOpts.fullTurnAngle); - } - - if (from->pl().isRestricted() && !_res.may(from, to, n)) oneway = 1; - - // for debugging - n->pl().setVisited(); - - if (_tgGrp && n->pl().getSI() && n->pl().getSI()->getGroup() != _tgGrp) - stationSkip = 1; - } - - double transitLinePen = transitLineCmp(from->pl()); - - return EdgeCost(from->pl().lvl() == 0 ? from->pl().getLength() : 0, - from->pl().lvl() == 1 ? from->pl().getLength() : 0, - from->pl().lvl() == 2 ? from->pl().getLength() : 0, - from->pl().lvl() == 3 ? from->pl().getLength() : 0, - from->pl().lvl() == 4 ? from->pl().getLength() : 0, - from->pl().lvl() == 5 ? from->pl().getLength() : 0, - from->pl().lvl() == 6 ? from->pl().getLength() : 0, - from->pl().lvl() == 7 ? from->pl().getLength() : 0, fullTurns, - stationSkip, from->pl().getLength() * oneway, oneway, - from->pl().getLength() * transitLinePen, 0, &_rOpts); -} - -// _____________________________________________________________________________ -double CostFunc::transitLineCmp(const trgraph::EdgePL& e) const { - double best = 1; - for (const auto* l : e.getLines()) { - double cur = _rAttrs.simi(l); - - if (cur < 0.0001) return cur; - if (cur < best) best = cur; - } - - return best; -} - -// _____________________________________________________________________________ -NDistHeur::NDistHeur(const RoutingOpts& rOpts, - const std::set& tos) - : _rOpts(rOpts), _maxCentD(0) { - size_t c = 0; - double x = 0, y = 0; - for (auto to : tos) { - x += to->pl().getGeom()->getX(); - y += to->pl().getGeom()->getY(); - c++; - } - - x /= c; - y /= c; - _center = FPoint(x, y); - - for (auto to : tos) { - double cur = static_cast(static_cast( - util::geo::webMercMeterDist(*to->pl().getGeom(), _center))); - if (cur > _maxCentD) _maxCentD = cur; - } -} - -// _____________________________________________________________________________ -DistHeur::DistHeur(uint8_t minLvl, const RoutingOpts& rOpts, - const std::set& tos) - : _rOpts(rOpts), _lvl(minLvl), _maxCentD(0) { - size_t c = 0; - double x = 0, y = 0; - for (auto to : tos) { - x += to->getFrom()->pl().getGeom()->getX(); - y += to->getFrom()->pl().getGeom()->getY(); - c++; - } - - x /= c; - y /= c; - _center = FPoint(x, y); - - for (auto to : tos) { - double cur = static_cast(static_cast( - util::geo::webMercMeterDist(*to->getFrom()->pl().getGeom(), _center) * - _rOpts.levelPunish[_lvl])); - if (cur > _maxCentD) _maxCentD = cur; - } -} - -// _____________________________________________________________________________ -EdgeCost DistHeur::operator()(const trgraph::Edge* a, - const std::set& b) const { - double cur = static_cast(static_cast( - util::geo::webMercMeterDist(*a->getTo()->pl().getGeom(), _center) * - _rOpts.levelPunish[_lvl])); - - UNUSED(b); - - return EdgeCost(cur - _maxCentD, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); -} - -// _____________________________________________________________________________ -EdgeCost NDistHeur::operator()(const trgraph::Node* a, - const std::set& b) const { - double cur = static_cast(static_cast( - util::geo::webMercMeterDist(*a->pl().getGeom(), _center))); - - UNUSED(b); - - return EdgeCost(cur - _maxCentD, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); -} - -// _____________________________________________________________________________ -double CombCostFunc::operator()(const router::Edge* from, const router::Node* n, - const router::Edge* to) const { - UNUSED(n); - UNUSED(from); - return to->pl().getCost().getValue(); -} - -// _____________________________________________________________________________ -Router::Router(const trgraph::Graph& g, size_t numThreads) - : _g(g), _cache(numThreads) { - for (size_t i = 0; i < numThreads; i++) { - _cache[i] = new Cache(); - } -} - -// _____________________________________________________________________________ -Router::~Router() { - for (size_t i = 0; i < _cache.size(); i++) { - delete _cache[i]; - } -} - -// _____________________________________________________________________________ -bool Router::compConned(const NodeCandGroup& a, const NodeCandGroup& b) const { - for (auto n1 : a) { - for (auto n2 : b) { - if (n1.nd->pl().getComp() == n2.nd->pl().getComp()) return true; - } - } - - return false; -} - -// _____________________________________________________________________________ -HopBand Router::getHopBand(const NodeCandGroup& a, const NodeCandGroup& b, - const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, - const osm::Restrictor& rest) const { - double pend = 0; - for (size_t i = 0; i < a.size(); i++) { - for (size_t j = 0; j < b.size(); j++) { - double d = util::geo::webMercMeterDist(*a[i].nd->pl().getGeom(), - *b[j].nd->pl().getGeom()); - if (d > pend) pend = d; - } - } - - LOG(VDEBUG) << "Pending max hop distance is " << pend << " meters"; - - const trgraph::StatGroup* tgGrpTo = 0; - if (b.begin()->nd->pl().getSI()) - tgGrpTo = b.begin()->nd->pl().getSI()->getGroup(); - - CostFunc costF(rAttrs, rOpts, rest, tgGrpTo, pend * 50); - - std::set from, to; - - // TODO(patrick): test if the two sets share a common connected component - - for (auto n : a) - from.insert(n.nd->getAdjListOut().begin(), n.nd->getAdjListOut().end()); - - for (auto n : b) - to.insert(n.nd->getAdjListOut().begin(), n.nd->getAdjListOut().end()); - - LOG(VDEBUG) << "Doing pilot run between " << from.size() << "->" << to.size() - << " candidates"; - - EdgeList el; - EdgeCost ret = costF.inf(); - DistHeur distH(0, rOpts, to); - - if (compConned(a, b)) - ret = EDijkstra::shortestPath(from, to, costF, distH, &el); - - if (el.size() < 2 && costF.inf() <= ret) { - LOG(VDEBUG) << "Pilot run: no connection between candidate groups," - << " setting max distance to 1"; - return HopBand{0, 1, 0, 0}; - } - - // cache the found path, will save a few dijkstra iterations - nestedCache(&el, from, costF, rAttrs); - - auto na = el.back()->getFrom(); - auto nb = el.front()->getFrom(); - - double maxStrD = 0; - - for (auto e : to) { - double d = static_cast(static_cast( - util::geo::webMercMeterDist(*el.front()->getFrom()->pl().getGeom(), - *e->getTo()->pl().getGeom()))); - if (d > maxStrD) maxStrD = d; - } - - // TODO(patrick): derive the punish level here automatically - double maxD = std::max(ret.getValue(), pend * rOpts.levelPunish[2]) * 3; - double minD = ret.getValue(); - - LOG(VDEBUG) << "Pilot run: min distance between two groups is " - << ret.getValue() << " (between nodes " << na << " and " << nb - << "), using a max routing distance of " << maxD << ". The max" - << " straight line distance from the pilot target to any other " - "target node was" - << " " << maxStrD << "."; - - return HopBand{minD, maxD, el.front(), maxStrD}; -} - -// _____________________________________________________________________________ -EdgeListHops Router::routeGreedy(const NodeCandRoute& route, - const RoutingAttrs& rAttrs, - const RoutingOpts& rOpts, - const osm::Restrictor& rest) const { - if (route.size() < 2) return EdgeListHops(); - EdgeListHops ret(route.size() - 1); - - for (size_t i = 0; i < route.size() - 1; i++) { - const trgraph::StatGroup* tgGrp = 0; - std::set from, to; - for (auto c : route[i]) from.insert(c.nd); - for (auto c : route[i + 1]) to.insert(c.nd); - if (route[i + 1].begin()->nd->pl().getSI()) - tgGrp = route[i + 1].begin()->nd->pl().getSI()->getGroup(); - - NCostFunc cost(rAttrs, rOpts, rest, tgGrp); - NDistHeur dist(rOpts, to); - - NodeList nodesRet; - EdgeListHop hop; - Dijkstra::shortestPath(from, to, cost, dist, &hop.edges, &nodesRet); - - if (nodesRet.size() > 1) { - // careful: nodesRet is reversed! - hop.start = nodesRet.back(); - hop.end = nodesRet.front(); - } else { - // just take the first candidate if no route could be found - hop.start = *from.begin(); - hop.end = *to.begin(); - } - - ret[i] = hop; - } - - return ret; -} - -// _____________________________________________________________________________ -EdgeListHops Router::routeGreedy2(const NodeCandRoute& route, - const RoutingAttrs& rAttrs, - const RoutingOpts& rOpts, - const osm::Restrictor& rest) const { - if (route.size() < 2) return EdgeListHops(); - EdgeListHops ret(route.size() - 1); - - for (size_t i = 0; i < route.size() - 1; i++) { - const trgraph::StatGroup* tgGrp = 0; - std::set from, to; - - if (i == 0) - for (auto c : route[i]) from.insert(c.nd); - else - from.insert(const_cast(ret[i - 1].end)); - - for (auto c : route[i + 1]) to.insert(c.nd); - - if (route[i + 1].begin()->nd->pl().getSI()) - tgGrp = route[i + 1].begin()->nd->pl().getSI()->getGroup(); - - NCostFunc cost(rAttrs, rOpts, rest, tgGrp); - NDistHeur dist(rOpts, to); - - NodeList nodesRet; - EdgeListHop hop; - Dijkstra::shortestPath(from, to, cost, dist, &hop.edges, &nodesRet); - if (nodesRet.size() > 1) { - // careful: nodesRet is reversed! - hop.start = nodesRet.back(); - hop.end = nodesRet.front(); - } else { - // just take the first candidate if no route could be found - hop.start = *from.begin(); - hop.end = *to.begin(); - } - - ret[i] = hop; - } - - return ret; -} - -// _____________________________________________________________________________ -EdgeListHops Router::route(const NodeCandRoute& route, - const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, - const osm::Restrictor& rest, - router::Graph* cgraph) const { - if (route.size() < 2) return EdgeListHops(); - EdgeListHops ret(route.size() - 1); - - CombCostFunc ccost(rOpts); - router::Node* source = cgraph->addNd(); - router::Node* sink = cgraph->addNd(); - CombNodeMap nodes; - CombNodeMap nextNodes; - - for (size_t i = 0; i < route[0].size(); i++) { - for (const auto* e : route[0][i].nd->getAdjListOut()) { - // we can be sure that each edge is exactly assigned to only one - // node because the transitgraph is directed - nodes[e] = cgraph->addNd(route[0][i].nd); - cgraph->addEdg(source, nodes[e]) - ->pl() - .setCost(EdgeCost(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - route[0][i].pen, 0)); - } - } - - size_t iters = EDijkstra::ITERS; - double itPerSecTot = 0; - size_t n = 0; - for (size_t i = 0; i < route.size() - 1; i++) { - nextNodes.clear(); - HopBand hopBand = getHopBand(route[i], route[i + 1], rAttrs, rOpts, rest); - - const trgraph::StatGroup* tgGrp = 0; - if (route[i + 1].begin()->nd->pl().getSI()) - tgGrp = route[i + 1].begin()->nd->pl().getSI()->getGroup(); - - std::set froms; - for (const auto& fr : route[i]) { - froms.insert(fr.nd->getAdjListOut().begin(), - fr.nd->getAdjListOut().end()); - } - - for (auto eFr : froms) { - router::Node* cNodeFr = nodes.find(eFr)->second; - - EdgeSet tos; - std::map edges; - std::map pens; - std::unordered_map edgeLists; - std::unordered_map costs; - - assert(route[i + 1].size()); - - for (const auto& to : route[i + 1]) { - assert(to.nd->getAdjListOut().size()); - for (auto eTo : to.nd->getAdjListOut()) { - tos.insert(eTo); - if (!nextNodes.count(eTo)) nextNodes[eTo] = cgraph->addNd(to.nd); - if (i == route.size() - 2) cgraph->addEdg(nextNodes[eTo], sink); - - auto* ce = cgraph->addEdg(cNodeFr, nextNodes[eTo]); - edges[eTo] = ce; - pens[eTo] = to.pen; - - edgeLists[eTo] = ce->pl().getEdges(); - ce->pl().setStartNode(eFr->getFrom()); - // for debugging - ce->pl().setStartEdge(eFr); - - ce->pl().setEndNode(to.nd); - // for debugging - ce->pl().setEndEdge(eTo); - } - } - - size_t iters = EDijkstra::ITERS; - auto t1 = TIME(); - - assert(tos.size()); - assert(froms.size()); - - hops(eFr, froms, tos, tgGrp, edgeLists, &costs, rAttrs, rOpts, rest, - hopBand); - double itPerSec = - (static_cast(EDijkstra::ITERS - iters)) / TOOK(t1, TIME()); - n++; - itPerSecTot += itPerSec; - - LOG(VDEBUG) << "from " << eFr << ": 1-" << tos.size() << " (" - << route[i + 1].size() << " nodes) hop took " - << EDijkstra::ITERS - iters << " iterations, " - << TOOK(t1, TIME()) << "ms (tput: " << itPerSec << " its/ms)"; - for (auto& kv : edges) { - kv.second->pl().setCost( - EdgeCost(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, pens[kv.first], 0) + - costs[kv.first]); - - if (kv.second->pl().getEdges()->size()) { - if (kv.second->pl().getEdges() && - kv.second->pl().getEdges()->size()) { - // the reach edge is included, but we dont want it in the geometry - kv.second->pl().getEdges()->erase( - kv.second->pl().getEdges()->begin()); - } - } - } - } - - std::swap(nodes, nextNodes); - } - - LOG(VDEBUG) << "Hops took " << EDijkstra::ITERS - iters << " iterations," - << " average tput was " << (itPerSecTot / n) << " its/ms"; - - iters = EDijkstra::ITERS; - std::vector res; - EDijkstra::shortestPath(source, sink, ccost, &res); - size_t j = 0; - - LOG(VDEBUG) << "Optim graph solve took " << EDijkstra::ITERS - iters - << " iterations."; - - for (auto i = res.rbegin(); i != res.rend(); i++) { - const auto e = *i; - if (e->getFrom() != source && e->getTo() != sink) { - assert(e->pl().frontNode()); - assert(e->pl().backNode()); - - ret[j] = EdgeListHop{std::move(*e->pl().getEdges()), e->pl().frontNode(), - e->pl().backNode()}; - j++; - } - } - - assert(ret.size() == j); - return ret; -} - -// _____________________________________________________________________________ -void Router::hops(trgraph::Edge* from, const std::set& froms, - const std::set tos, - const trgraph::StatGroup* tgGrp, - const std::unordered_map& edgesRet, - std::unordered_map* rCosts, - const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, - const osm::Restrictor& rest, HopBand hopB) const { - std::set rem; - - CostFunc cost(rAttrs, rOpts, rest, tgGrp, hopB.maxD); - - const auto& cached = getCachedHops(from, tos, edgesRet, rCosts, rAttrs); - - for (auto e : cached) { - // shortcut: if the nodes lie in two different connected components, - // the distance between them is trivially infinite - if (e == from || e->getFrom() == from->getFrom() || - from->getFrom()->pl().getComp() != e->getTo()->pl().getComp() || - e->pl().oneWay() == 2 || from->pl().oneWay() == 2) { - (*rCosts)[e] = cost.inf(); - } else { - rem.insert(e); - } - } - - LOG(VDEBUG) << "From cache: " << tos.size() - rem.size() - << ", have to cal: " << rem.size(); - - if (rem.size()) { - DistHeur dist(from->getFrom()->pl().getComp()->minEdgeLvl, rOpts, rem); - const auto& ret = EDijkstra::shortestPath(from, rem, cost, dist, edgesRet); - for (const auto& kv : ret) { - nestedCache(edgesRet.at(kv.first), froms, cost, rAttrs); - - (*rCosts)[kv.first] = kv.second; - } - } -} - -// _____________________________________________________________________________ -void Router::nestedCache(const EdgeList* el, - const std::set& froms, - const CostFunc& cost, - const RoutingAttrs& rAttrs) const { - if (el->size() == 0) return; - // iterate over result edges backwards - EdgeList curEdges; - EdgeCost curCost; - - size_t j = 0; - - for (auto i = el->begin(); i < el->end(); i++) { - if (curEdges.size()) { - curCost = curCost + cost(*i, (*i)->getTo(), curEdges.back()); - } - - curEdges.push_back(*i); - - if (froms.count(*i)) { - EdgeCost startC = cost(0, 0, *i) + curCost; - cache(*i, el->front(), startC, &curEdges, rAttrs); - j++; - } - } -} - -// _____________________________________________________________________________ -std::set Router::getCachedHops( - trgraph::Edge* from, const std::set& tos, - const std::unordered_map& edgesRet, - std::unordered_map* rCosts, - const RoutingAttrs& rAttrs) const { - std::set ret; - for (auto to : tos) { - if ((*_cache[omp_get_thread_num()])[rAttrs][from].count(to)) { - const auto& cv = (*_cache[omp_get_thread_num()])[rAttrs][from][to]; - (*rCosts)[to] = cv.first; - *edgesRet.at(to) = cv.second; - } else { - ret.insert(to); - } - } - - return ret; -} - -// _____________________________________________________________________________ -void Router::cache(trgraph::Edge* from, trgraph::Edge* to, const EdgeCost& c, - EdgeList* edges, const RoutingAttrs& rAttrs) const { - if (from == to) return; - (*_cache[omp_get_thread_num()])[rAttrs][from][to] = - std::pair(c, *edges); -} - -// _____________________________________________________________________________ -size_t Router::getCacheNumber() const { return _cache.size(); } diff --git a/src/pfaedle/router/Router.h b/src/pfaedle/router/Router.h index b93876a..630d456 100644 --- a/src/pfaedle/router/Router.h +++ b/src/pfaedle/router/Router.h @@ -5,189 +5,97 @@ #ifndef PFAEDLE_ROUTER_ROUTER_H_ #define PFAEDLE_ROUTER_ROUTER_H_ -#include +#include #include +#include +#include +#include #include #include #include -#include -#include -#include +#include "pfaedle/Def.h" #include "pfaedle/osm/Restrictor.h" -#include "pfaedle/router/Graph.h" +#include "pfaedle/router/HopCache.h" #include "pfaedle/router/Misc.h" #include "pfaedle/router/RoutingAttrs.h" +#include "pfaedle/router/TripTrie.h" +#include "pfaedle/router/Weights.h" #include "pfaedle/trgraph/Graph.h" -#include "util/graph/Dijkstra.h" +#include "util/Misc.h" +#include "util/geo/Geo.h" #include "util/graph/EDijkstra.h" -using util::graph::EDijkstra; -using util::graph::Dijkstra; - namespace pfaedle { namespace router { -typedef std::unordered_map CombNodeMap; +constexpr static uint32_t ROUTE_INF = std::numeric_limits::max(); +constexpr static double DBL_INF = std::numeric_limits::infinity(); +constexpr static size_t NO_PREDE = std::numeric_limits::max(); + +constexpr static int MAX_ROUTE_COST_DOUBLING_STEPS = 3; + typedef std::pair HId; -typedef std::map< - RoutingAttrs, - std::unordered_map > > > - Cache; +typedef std::vector LayerCostsDAG; +typedef std::vector CostsDAG; +typedef std::vector> PredeDAG; -struct HopBand { - double minD; - double maxD; - const trgraph::Edge* nearest; - double maxInGrpDist; -}; +typedef std::unordered_map> + EdgeCostMatrix; +typedef std::unordered_map> + EdgeDistMatrix; +typedef util::graph::EDijkstra::EList TrEList; -struct CostFunc - : public EDijkstra::CostFunc { - CostFunc(const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, - const osm::Restrictor& res, const trgraph::StatGroup* tgGrp, - double max) - : _rAttrs(rAttrs), - _rOpts(rOpts), - _res(res), - _max(max), - _tgGrp(tgGrp), - _inf(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, _max, 0) {} +typedef std::vector, uint32_t>> CostMatrix; - const RoutingAttrs& _rAttrs; - const RoutingOpts& _rOpts; - const osm::Restrictor& _res; - double _max; - const trgraph::StatGroup* _tgGrp; - EdgeCost _inf; - - EdgeCost operator()(const trgraph::Edge* from, const trgraph::Node* n, - const trgraph::Edge* to) const; - EdgeCost inf() const { return _inf; } - - double transitLineCmp(const trgraph::EdgePL& e) const; -}; - -struct NCostFunc - : public Dijkstra::CostFunc { - NCostFunc(const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, - const osm::Restrictor& res, const trgraph::StatGroup* tgGrp) - : _rAttrs(rAttrs), - _rOpts(rOpts), - _res(res), - _tgGrp(tgGrp), - _inf(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - std::numeric_limits::infinity(), 0) {} - - const RoutingAttrs& _rAttrs; - const RoutingOpts& _rOpts; - const osm::Restrictor& _res; - const trgraph::StatGroup* _tgGrp; - EdgeCost _inf; - - EdgeCost operator()(const trgraph::Node* from, const trgraph::Edge* e, - const trgraph::Node* to) const; - EdgeCost inf() const { return _inf; } - - double transitLineCmp(const trgraph::EdgePL& e) const; -}; - -struct DistHeur - : public EDijkstra::HeurFunc { - DistHeur(uint8_t minLvl, const RoutingOpts& rOpts, - const std::set& tos); - - const RoutingOpts& _rOpts; - uint8_t _lvl; - FPoint _center; - double _maxCentD; - EdgeCost operator()(const trgraph::Edge* a, - const std::set& b) const; -}; - -struct NDistHeur - : public Dijkstra::HeurFunc { - NDistHeur(const RoutingOpts& rOpts, const std::set& tos); - - const RoutingOpts& _rOpts; - FPoint _center; - double _maxCentD; - EdgeCost operator()(const trgraph::Node* a, - const std::set& b) const; -}; - -struct CombCostFunc - : public EDijkstra::CostFunc { - explicit CombCostFunc(const RoutingOpts& rOpts) : _rOpts(rOpts) {} - - const RoutingOpts& _rOpts; - - double operator()(const router::Edge* from, const router::Node* n, - const router::Edge* to) const; - double inf() const { return std::numeric_limits::infinity(); } +class Router { + public: + virtual ~Router() = default; + virtual std::map route( + const TripTrie* trie, const EdgeCandMap& ecm, + const RoutingOpts& rOpts, const osm::Restrictor& rest, HopCache* hopCache, + bool noFastHops) const = 0; }; /* * Finds the most likely route of schedule-based vehicle between stops in a * physical transportation network */ -class Router { +template +class RouterImpl : public Router { public: - // Init this router with caches for numThreads threads - Router(const trgraph::Graph& g, size_t numThreads); - ~Router(); - - // Find the most likely path through the graph for a node candidate route. - EdgeListHops route(const NodeCandRoute& route, const RoutingAttrs& rAttrs, - const RoutingOpts& rOpts, const osm::Restrictor& rest, - router::Graph* cgraph) const; - - // Find the most likely path through cgraph for a node candidate route, but - // based on a greedy node to node approach - EdgeListHops routeGreedy(const NodeCandRoute& route, - const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, - const osm::Restrictor& rest) const; - - // Find the most likely path through cgraph for a node candidate route, but - // based on a greedy node to node set approach - EdgeListHops routeGreedy2(const NodeCandRoute& route, - const RoutingAttrs& rAttrs, - const RoutingOpts& rOpts, - const osm::Restrictor& rest) const; - - // Return the number of thread caches this router was initialized with - size_t getCacheNumber() const; + // Find the most likely path through the graph for a trip trie. + virtual std::map route( + const TripTrie* trie, const EdgeCandMap& ecm, + const RoutingOpts& rOpts, const osm::Restrictor& rest, HopCache* hopCache, + bool noFastHops) const; private: - const trgraph::Graph& _g; + void hops(const EdgeCandGroup& from, const EdgeCandGroup& to, + CostMatrix* rCosts, CostMatrix* dists, const RoutingAttrs& rAttrs, + const RoutingOpts& rOpts, const osm::Restrictor& rest, + HopCache* hopCache, uint32_t maxCost) const; - mutable std::vector _cache; - HopBand getHopBand(const NodeCandGroup& a, const NodeCandGroup& b, - const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, - const osm::Restrictor& rest) const; + void hopsFast(const EdgeCandGroup& from, const EdgeCandGroup& to, + const LayerCostsDAG& initCosts, CostMatrix* rCosts, + const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& rest, - void hops(trgraph::Edge* from, const std::set& froms, - const std::set to, const trgraph::StatGroup* tgGrp, - const std::unordered_map& edgesRet, - std::unordered_map* rCosts, - const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, - const osm::Restrictor& rest, HopBand hopB) const; + HopCache* hopCache, uint32_t maxCost) const; - std::set getCachedHops( - trgraph::Edge* from, const std::set& to, - const std::unordered_map& edgesRet, - std::unordered_map* rCosts, - const RoutingAttrs& rAttrs) const; + bool connected(const EdgeCand& from, const EdgeCandGroup& tos) const; + bool connected(const EdgeCandGroup& froms, const EdgeCand& to) const; - void cache(trgraph::Edge* from, trgraph::Edge* to, const EdgeCost& c, - EdgeList* edges, const RoutingAttrs& rAttrs) const; + bool cacheDrop( - void nestedCache(const EdgeList* el, const std::set& froms, - const CostFunc& cost, const RoutingAttrs& rAttrs) const; + HopCache* hopCache, const std::set& froms, + const trgraph::Edge* to, uint32_t maxCost) const; - bool compConned(const NodeCandGroup& a, const NodeCandGroup& b) const; + uint32_t addNonOverflow(uint32_t a, uint32_t b) const; }; + +#include "pfaedle/router/Router.tpp" } // namespace router } // namespace pfaedle diff --git a/src/pfaedle/router/Router.tpp b/src/pfaedle/router/Router.tpp new file mode 100644 index 0000000..f9b2a7b --- /dev/null +++ b/src/pfaedle/router/Router.tpp @@ -0,0 +1,629 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifdef _OPENMP +#include +#else +#define omp_get_thread_num() 0 +#define omp_get_num_procs() 1 +#endif + +#include +#include +#include +#include +#include +#include +#include + +using util::graph::EDijkstra; + +// _____________________________________________________________________________ +template +std::map RouterImpl::route( + const TripTrie* trie, const EdgeCandMap& ecm, + const RoutingOpts& rOpts, const osm::Restrictor& rest, HopCache* hopCache, + bool noFastHops) const { + std::map ret; + + // the current node costs in our DAG + CostsDAG costsDAG(trie->getNds().size()); + PredeDAG predeDAG(trie->getNds().size()); + std::vector maxCosts(trie->getNds().size()); + + // skip the root node, init all to inf + for (size_t nid = 1; nid < trie->getNds().size(); nid++) { + costsDAG[nid].resize(ecm.at(nid).size(), DBL_INF); + predeDAG[nid].resize(ecm.at(nid).size(), NO_PREDE); + } + + std::stack st; + + // init cost of all first childs + for (size_t cnid : trie->getNd(0).childs) { + st.push(cnid); + for (size_t frId = 0; frId < ecm.at(cnid).size(); frId++) { + costsDAG[cnid][frId] = ecm.at(cnid)[frId].pen; + } + } + + while (!st.empty()) { + size_t frTrNid = st.top(); + st.pop(); + const auto& frTrNd = trie->getNd(frTrNid); + + // determine the max speed for this hop + double maxSpeed = 0; + for (size_t nid = 0; nid < ecm.at(frTrNid).size(); nid++) { + if (!ecm.at(frTrNid)[nid].e) continue; + if (ecm.at(frTrNid)[nid].e->getFrom()->pl().getComp().maxSpeed > maxSpeed) + maxSpeed = ecm.at(frTrNid)[nid].e->getFrom()->pl().getComp().maxSpeed; + } + + for (size_t toTrNid : trie->getNd(frTrNid).childs) { + CostMatrix costM, dists; + const auto& toTrNd = trie->getNd(toTrNid); + + if (frTrNd.arr && !toTrNd.arr) { + for (size_t toId = 0; toId < costsDAG[toTrNid].size(); toId++) { + auto toCand = ecm.at(toTrNid)[toId]; + for (size_t frId : toCand.depPrede) { + double newC = costsDAG[frTrNid][frId] + ecm.at(toTrNid)[toId].pen; + if (newC < costsDAG[toTrNid][toId]) { + costsDAG[toTrNid][toId] = newC; + predeDAG[toTrNid][toId] = frId; + } + } + } + st.push(toTrNid); + continue; + } + + const double avgDepT = frTrNd.accTime / frTrNd.trips; + const double avgArrT = toTrNd.accTime / toTrNd.trips; + + double hopDist = 0; + + hopDist = util::geo::haversine(frTrNd.lat, frTrNd.lng, toTrNd.lat, + toTrNd.lng); + + double minTime = hopDist / maxSpeed; + double hopTime = avgArrT - avgDepT; + + if (hopTime < minTime) hopTime = minTime; + + uint32_t newMaxCost = TW::maxCost(hopTime, rOpts); + uint32_t maxCost = newMaxCost; + + bool found = false; + int step = 0; + + while (!found && step <= MAX_ROUTE_COST_DOUBLING_STEPS) { + maxCosts[toTrNid] = newMaxCost; + maxCost = newMaxCost; + + // calculate n x n hops between layers + if (noFastHops || !TW::ALLOWS_FAST_ROUTE) { + hops(ecm.at(frTrNid), ecm.at(toTrNid), &costM, &dists, toTrNd.rAttrs, + rOpts, rest, hopCache, maxCost); + } else { + hopsFast(ecm.at(frTrNid), ecm.at(toTrNid), costsDAG[frTrNid], &costM, + toTrNd.rAttrs, rOpts, rest, hopCache, maxCost); + } + + for (size_t matrixI = 0; matrixI < costM.size(); matrixI++) { + const auto& mVal = costM[matrixI]; + const size_t frId = mVal.first.first; + const size_t toId = mVal.first.second; + const uint32_t c = mVal.second; + + double mDist = 0; + + // the dists and the costM matrices have entries at exactly the same + // loc + if (TW::NEED_DIST) mDist = dists[matrixI].second; + + // calculate the transition weights + const double depT = ecm.at(frTrNid)[frId].time; + const double arrT = ecm.at(toTrNid)[toId].time; + const double w = TW::weight(c, mDist, arrT - depT, hopDist, rOpts); + + // update costs to successors in next layer + double newC = costsDAG[frTrNid][frId] + ecm.at(toTrNid)[toId].pen + w; + if (newC < costsDAG[toTrNid][toId]) { + costsDAG[toTrNid][toId] = newC; + predeDAG[toTrNid][toId] = frId; + found = true; + } + } + + if (newMaxCost <= std::numeric_limits::max() / 2) + newMaxCost *= 2; + else + newMaxCost = std::numeric_limits::max(); + + if (newMaxCost == maxCost) break; + step++; + } + + if (!found) { + // write the cost for the NULL candidates as a fallback + for (size_t frNid = 0; frNid < ecm.at(frTrNid).size(); frNid++) { + double newC = costsDAG[frTrNid][frNid] + maxCost * 100; + // in the time expanded case, there might be multiple null cands + size_t nullCId = 0; + while (nullCId < ecm.at(toTrNid).size() && + !ecm.at(toTrNid)[nullCId].e) { + if (newC < costsDAG[toTrNid][nullCId]) { + predeDAG[toTrNid][nullCId] = frNid; + costsDAG[toTrNid][nullCId] = newC; + } + nullCId++; + } + } + + // for the remaining, write dummy edges + for (size_t frNid = 0; frNid < ecm.at(frTrNid).size(); frNid++) { + // skip NULL candidates + size_t toNid = 1; + while (toNid < ecm.at(toTrNid).size() && !ecm.at(toTrNid)[toNid].e) + toNid++; + for (; toNid < ecm.at(toTrNid).size(); toNid++) { + double newC = costsDAG[frTrNid][frNid] + ecm.at(toTrNid)[toNid].pen; + if (newC < costsDAG[toTrNid][toNid]) { + predeDAG[toTrNid][toNid] = frNid; + costsDAG[toTrNid][toNid] = newC; + } + } + } + } + + st.push(toTrNid); + } + } + + // update sink costs + std::unordered_map sinkCosts; + std::unordered_map frontIds; + for (auto leaf : trie->getNdTrips()) { + sinkCosts[leaf.first] = DBL_INF; + frontIds[leaf.first] = 0; + + for (size_t lastId = 0; lastId < ecm.at(leaf.first).size(); lastId++) { + double nCost = costsDAG[leaf.first][lastId]; + if (nCost < sinkCosts[leaf.first]) { + frontIds[leaf.first] = lastId; + sinkCosts[leaf.first] = nCost; + } + } + } + + // retrieve edges + for (auto leaf : trie->getNdTrips()) { + const auto leafNid = leaf.first; + auto curTrieNid = leafNid; + + while (predeDAG[curTrieNid][frontIds[leafNid]] != NO_PREDE) { + const auto curTrieParNid = trie->getNd(curTrieNid).parent; + const auto frId = predeDAG[curTrieNid][frontIds[leafNid]]; + const auto toId = frontIds[leafNid]; + + const auto frTrNd = trie->getNd(curTrieParNid); + const auto toTrNd = trie->getNd(curTrieNid); + + // skip in-node hops + if (frTrNd.arr && !toTrNd.arr) { + frontIds[leafNid] = frId; + curTrieNid = curTrieParNid; + continue; + } + + std::vector edgs; + + const auto& fr = ecm.at(curTrieParNid)[frId]; + const auto& to = ecm.at(curTrieNid)[toId]; + + // for subtracting and adding progression costs + typename TW::CostFunc costPr(toTrNd.rAttrs, rOpts, rest, ROUTE_INF); + + if (fr.e && to.e) { + // account for max progression start offset, do this exactly like + // in the hops calculation to ensure that we can find the path again + double maxProgrStart = 0; + for (const auto& fr : ecm.at(curTrieParNid)) { + if (!fr.e) continue; + double progrStart = 0; + if (fr.progr > 0) progrStart = costPr(fr.e, 0, 0) * fr.progr; + if (progrStart > maxProgrStart) maxProgrStart = progrStart; + } + + const double maxCostRt = maxCosts[curTrieNid] + maxProgrStart; + uint32_t maxCostRtInt = maxCostRt; + + // avoid overflow + if (maxCostRt >= std::numeric_limits::max()) { + maxCostRtInt = std::numeric_limits::max(); + } + + typename TW::CostFunc cost(toTrNd.rAttrs, rOpts, rest, maxCostRtInt); + typename TW::DistHeur distH(fr.e->getFrom()->pl().getComp().maxSpeed, + rOpts, {to.e}); + + const double c = + EDijkstra::shortestPath(fr.e, to.e, cost, distH, &edgs); + + if (c < maxCostRtInt) { + // a path was found, use it + ret[leafNid].push_back( + {edgs, fr.e, to.e, fr.progr, to.progr, {}, {}}); + } else { + // no path was found, which is marked by an empty edge list + ret[leafNid].push_back({{}, fr.e, to.e, fr.progr, to.progr, {}, {}}); + } + } else { + // fallback to the position given in candidate + if (fr.e) { + ret[leafNid].push_back({edgs, fr.e, 0, fr.progr, 0, {}, to.point}); + } else if (to.e) { + ret[leafNid].push_back({edgs, 0, to.e, 0, to.progr, fr.point, {}}); + } else { + ret[leafNid].push_back({edgs, 0, 0, 0, 0, fr.point, to.point}); + } + } + frontIds[leafNid] = frId; + curTrieNid = curTrieParNid; + } + } + + return ret; +} + +// _____________________________________________________________________________ +template +void RouterImpl::hops(const EdgeCandGroup& froms, const EdgeCandGroup& tos, + CostMatrix* rCosts, CostMatrix* dists, + const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& rest, HopCache* hopCache, + uint32_t maxCost) const { + // standard 1 -> n approach + std::set eFrs; + for (const auto& from : froms) { + if (!from.e) continue; + eFrs.insert(from.e); + } + + std::set eTos; + for (const auto& to : tos) { + if (!to.e) continue; + eTos.insert(to.e); + } + + EdgeCostMatrix ecm; + EdgeDistMatrix ecmDist; + + // account for max progression start offset + double maxProgrStart = 0; + typename TW::CostFunc cost(rAttrs, rOpts, rest, ROUTE_INF); + for (const auto& fr : froms) { + if (!fr.e) continue; + double progrStart = 0; + if (fr.progr > 0) progrStart = cost(fr.e, 0, 0) * fr.progr; + if (progrStart > maxProgrStart) maxProgrStart = progrStart; + } + + maxCost = addNonOverflow(maxCost, maxProgrStart); + typename TW::CostFunc costF(rAttrs, rOpts, rest, maxCost); + + for (trgraph::Edge* eFrom : eFrs) { + std::set remTos; + for (trgraph::Edge* eTo : eTos) { + // init ecmDist + ecmDist[eFrom][eTo] = ROUTE_INF; + + std::pair cached = {0, 0}; + if (hopCache) cached = hopCache->get(eFrom, eTo); + + // shortcut: if the nodes lie in two different connected components, + // the distance between them is trivially infinite + if (eFrom->getFrom()->pl().getCompId() != + eTo->getTo()->pl().getCompId()) { + ecm[eFrom][eTo] = costF.inf(); + } else if (cached.second >= costF.inf()) { + ecm[eFrom][eTo] = costF.inf(); + } else if (!TW::NEED_DIST && cached.second) { + ecm[eFrom][eTo] = cached.first; + } else { + remTos.insert(eTo); + } + } + + if (remTos.size()) { + typename TW::DistHeur distH(eFrom->getFrom()->pl().getComp().maxSpeed, + rOpts, remTos); + + std::unordered_map paths; + std::unordered_map pathPtrs; + for (auto to : tos) pathPtrs[to.e] = &paths[to.e]; + + const auto& costs = + EDijkstra::shortestPath(eFrom, remTos, costF, distH, pathPtrs); + + for (const auto& c : costs) { + ecm[eFrom][c.first] = c.second; + + if (paths[c.first].size() == 0) { + if (hopCache) hopCache->setMin(eFrom, c.first, maxCost); + continue; // no path found + } + + if (hopCache) hopCache->setEx(eFrom, c.first, c.second); + } + + if (TW::NEED_DIST) { + for (const auto& c : costs) { + if (!paths[c.first].size()) continue; + double d = 0; + // don't count last edge + for (size_t i = paths[c.first].size() - 1; i > 0; i--) { + d += paths[c.first][i]->pl().getLength(); + } + ecmDist[eFrom][c.first] = d; + } + } + } + } + + // build return costs + for (size_t frId = 0; frId < froms.size(); frId++) { + auto fr = froms[frId]; + if (!fr.e) continue; + auto costFr = costF(fr.e, 0, 0); + for (size_t toId = 0; toId < tos.size(); toId++) { + auto to = tos[toId]; + if (!to.e) continue; + + uint32_t c = ecm[fr.e][to.e]; + + if (c >= maxCost) continue; + + double dist = 0; + if (TW::NEED_DIST) dist = ecmDist[fr.e][to.e]; + + if (fr.e == to.e) { + if (fr.progr <= to.progr) { + auto costTo = costF(to.e, 0, 0); + const uint32_t progrCFr = costFr * fr.progr; + const uint32_t progrCTo = costTo * to.progr; + + // calculate this in one step to avoid uint32_t underflow below + c += progrCTo - progrCFr; + } else { + // trivial case we can ignore + continue; + } + + } else { + // subtract progression cost on first edge + if (fr.progr > 0) { + const uint32_t progrCFr = costFr * fr.progr; + c -= progrCFr; + if (TW::NEED_DIST) dist -= fr.e->pl().getLength() * fr.progr; + } + + // add progression cost on last edge + if (to.progr > 0) { + const auto costTo = costF(to.e, 0, 0); + const uint32_t progrCTo = costTo * to.progr; + c += progrCTo; + if (TW::NEED_DIST) dist += to.e->pl().getLength() * to.progr; + } + } + + if (c < maxCost - maxProgrStart) { + rCosts->push_back({{frId, toId}, c}); + if (TW::NEED_DIST) + dists->push_back({{frId, toId}, static_cast(dist)}); + } + } + } +} + +// _____________________________________________________________________________ +template +void RouterImpl::hopsFast(const EdgeCandGroup& froms, + const EdgeCandGroup& tos, + const LayerCostsDAG& rawInitCosts, + CostMatrix* rCosts, const RoutingAttrs& rAttrs, + const RoutingOpts& rOpts, + const osm::Restrictor& restr, HopCache* hopCache, + uint32_t maxCost) const { + std::unordered_map initCosts; + + std::set eFrs, eTos; + std::map> eFrCands, eToCands; + + double maxSpeed = 0; + for (size_t frId = 0; frId < froms.size(); frId++) { + if (rawInitCosts[frId] >= DBL_INF || !connected(froms[frId], tos)) continue; + + eFrs.insert(froms[frId].e); + eFrCands[froms[frId].e].push_back(frId); + + if (froms[frId].e->getFrom()->pl().getComp().maxSpeed > maxSpeed) + maxSpeed = froms[frId].e->getFrom()->pl().getComp().maxSpeed; + } + + for (size_t toId = 0; toId < tos.size(); toId++) { + if (!connected(froms, tos[toId])) + continue; // skip nodes not conn'ed to any + + if (hopCache && cacheDrop(hopCache, eFrs, tos[toId].e, maxCost)) + continue; // skip nodes we have already encountered at higher cost + + eTos.insert(tos[toId].e); + eToCands[tos[toId].e].push_back(toId); + } + + if (eFrs.size() == 0 || eTos.size() == 0) return; + + // account for max progression start offset + double maxProgrStart = 0; + typename TW::CostFunc progrCostF(rAttrs, rOpts, restr, ROUTE_INF); + for (const auto& fr : froms) { + if (!fr.e) continue; + double progrStart = 0; + if (fr.progr > 0) progrStart = progrCostF(fr.e, 0, 0) * fr.progr; + if (progrStart > maxProgrStart) maxProgrStart = progrStart; + } + + // initialize init doubles + LayerCostsDAG prepInitCosts(froms.size()); + for (size_t frId = 0; frId < froms.size(); frId++) { + if (!froms[frId].e || rawInitCosts[frId] >= DBL_INF) continue; + const auto& fr = froms[frId]; + // offset by progr start + double progrStart = progrCostF(fr.e, 0, 0) * fr.progr; + prepInitCosts[frId] = + TW::invWeight(rawInitCosts[frId], rOpts) + maxProgrStart - progrStart; + } + + // all init costs are inf + for (const auto& fr : froms) initCosts[fr.e] = ROUTE_INF; + + // now chose the best offset cost + for (size_t frId = 0; frId < froms.size(); frId++) { + if (!froms[frId].e || rawInitCosts[frId] >= DBL_INF) continue; + const auto& fr = froms[frId]; + if (prepInitCosts[frId] < initCosts[fr.e]) + initCosts[fr.e] = prepInitCosts[frId]; + } + + // get max init costs + uint32_t maxInit = 0; + uint32_t minInit = ROUTE_INF; + for (const auto& c : initCosts) { + if (!eFrs.count(c.first)) continue; + if (c.second != ROUTE_INF && c.second > maxInit) maxInit = c.second; + if (c.second < minInit) minInit = c.second; + } + + for (auto& c : initCosts) c.second = c.second - minInit; + + // account for start offsets + maxCost = addNonOverflow(maxCost, maxProgrStart); + + typename TW::CostFunc costF(rAttrs, rOpts, restr, + maxCost + (maxInit - minInit)); + + std::unordered_map paths; + std::unordered_map pathPtrs; + for (const auto& to : tos) pathPtrs[to.e] = &paths[to.e]; + + typename TW::DistHeur distH(maxSpeed, rOpts, eTos); + + const auto& costs = + EDijkstra::shortestPath(eFrs, eTos, initCosts, maxCost, costF, distH); + + for (const auto& c : costs) { + auto toEdg = c.first; + if (c.second.second >= costF.inf()) { + if (hopCache) hopCache->setMin(eFrs, toEdg, maxCost); + continue; // no path found + } + auto fromEdg = c.second.first; + uint32_t cost = c.second.second - initCosts[fromEdg]; + + if (cost >= maxCost) continue; + + for (size_t frId : eFrCands.find(fromEdg)->second) { + const auto& fr = froms[frId]; + auto costFr = costF(fr.e, 0, 0); + + for (size_t toId : eToCands.find(toEdg)->second) { + const auto& to = tos[toId]; + uint32_t wrCost = cost; + + if (fr.e == to.e) { + if (fr.progr <= to.progr) { + const auto costTo = costF(to.e, 0, 0); + const uint32_t progrCFr = costFr * fr.progr; + const uint32_t progrCTo = costTo * to.progr; + + // calculate this in one step to avoid uint32_t underflow below + wrCost += progrCTo - progrCFr; + } else { + // trivial case we can ignore + continue; + } + } else { + // subtract progression cost on first edge + if (fr.progr > 0) { + const uint32_t progrCFr = costFr * fr.progr; + wrCost -= progrCFr; + } + + // add progression cost on last edge + if (to.progr > 0) { + const auto costTo = costF(to.e, 0, 0); + const uint32_t progrCTo = costTo * to.progr; + wrCost += progrCTo; + } + } + + if (wrCost < maxCost - maxProgrStart) { + rCosts->push_back({{frId, toId}, wrCost}); + } + } + } + } +} + +// _____________________________________________________________________________ +template +bool RouterImpl::connected(const EdgeCand& fr, + const EdgeCandGroup& tos) const { + if (!fr.e) return false; + for (const auto& to : tos) { + if (!to.e) continue; + if (fr.e->getFrom()->pl().getCompId() == to.e->getFrom()->pl().getCompId()) + return true; + } + return false; +} + +// _____________________________________________________________________________ +template +bool RouterImpl::connected(const EdgeCandGroup& froms, + const EdgeCand& to) const { + if (!to.e) return false; + for (const auto& fr : froms) { + if (!fr.e) continue; + if (fr.e->getFrom()->pl().getCompId() == to.e->getFrom()->pl().getCompId()) + return true; + } + return false; +} + +// _____________________________________________________________________________ +template +bool RouterImpl::cacheDrop(HopCache* hopCache, + const std::set& froms, + const trgraph::Edge* to, + uint32_t maxCost) const { + for (auto fr : froms) + if (hopCache->get(fr, to).first <= maxCost) return false; + + return true; +} + +// _____________________________________________________________________________ +template +uint32_t RouterImpl::addNonOverflow(uint32_t a, uint32_t b) const { + if (a == std::numeric_limits::max() || + b == std::numeric_limits::max()) + return std::numeric_limits::max(); + uint32_t res = a + b; + if (res >= a && res >= b) return res; + return std::numeric_limits::max(); +} diff --git a/src/pfaedle/router/RoutingAttrs.h b/src/pfaedle/router/RoutingAttrs.h index 05aa5ed..3f7965b 100644 --- a/src/pfaedle/router/RoutingAttrs.h +++ b/src/pfaedle/router/RoutingAttrs.h @@ -5,8 +5,10 @@ #ifndef PFAEDLE_ROUTER_ROUTINGATTRS_H_ #define PFAEDLE_ROUTER_ROUTINGATTRS_H_ -#include #include +#include +#include +#include "pfaedle/statsimi-classifier/StatsimiClassifier.h" #include "pfaedle/trgraph/EdgePL.h" using pfaedle::trgraph::TransitEdgeLine; @@ -14,37 +16,81 @@ using pfaedle::trgraph::TransitEdgeLine; namespace pfaedle { namespace router { +struct LineSimilarity { + bool nameSimilar : 1; + bool fromSimilar : 1; + bool toSimilar : 1; +}; + +inline bool operator<(const LineSimilarity& a, const LineSimilarity& b) { + return (a.nameSimilar + a.fromSimilar + a.toSimilar) < + (b.nameSimilar + b.fromSimilar + b.toSimilar); +} + struct RoutingAttrs { - RoutingAttrs() : fromString(""), toString(""), shortName(""), _simiCache() {} - std::string fromString; - std::string toString; + RoutingAttrs() + : lineFrom(""), lineTo(), shortName(""), classifier(0), _simiCache() {} + RoutingAttrs(const std::string& shortName, const std::string& lineFrom, + const std::string& lineTo) + : lineFrom(lineFrom), + lineTo({lineTo}), + shortName(shortName), + classifier(0), + _simiCache() {} + std::string lineFrom; + std::vector lineTo; std::string shortName; - mutable std::map _simiCache; + const pfaedle::statsimiclassifier::StatsimiClassifier* classifier; + + mutable std::unordered_map _simiCache; + + LineSimilarity simi(const TransitEdgeLine* line) const { + // shortcut, if we don't have a line information, classify as similar + if (line->shortName.empty() && line->toStr.empty() && line->fromStr.empty()) + return {true, true, true}; - double simi(const TransitEdgeLine* line) const { auto i = _simiCache.find(line); if (i != _simiCache.end()) return i->second; - double cur = 1; - if (router::lineSimi(line->shortName, shortName) > 0.5) cur -= 0.33; + LineSimilarity ret{false, false, false}; - if (line->toStr.empty() || router::statSimi(line->toStr, toString) > 0.5) - cur -= 0.33; + if (shortName.empty() || router::lineSimi(line->shortName, shortName) > 0.5) + ret.nameSimilar = true; - if (line->fromStr.empty() || - router::statSimi(line->fromStr, fromString) > 0.5) - cur -= 0.33; + if (lineTo.size() == 0) { + ret.toSimilar = true; + } else { + for (const auto& lTo : lineTo) { + if (lTo.empty() || classifier->similar(line->toStr, lTo)) { + ret.toSimilar = true; + break; + } + } + } - _simiCache[line] = cur; + if (lineFrom.empty() || classifier->similar(line->fromStr, lineFrom)) + ret.fromSimilar = true; - return cur; + _simiCache[line] = ret; + + return ret; + } + + void merge(const RoutingAttrs& other) { + assert(other.lineFrom == lineFrom); + assert(other.shortName == shortName); + + for (const auto& l : other.lineTo) { + auto i = std::lower_bound(lineTo.begin(), lineTo.end(), l); + if (i != lineTo.end() && (*i) == l) continue; // already present + lineTo.insert(i, l); + } } }; inline bool operator==(const RoutingAttrs& a, const RoutingAttrs& b) { - return a.shortName == b.shortName && a.toString == b.toString && - a.fromString == b.fromString; + return a.shortName == b.shortName && a.lineFrom == b.lineFrom; } inline bool operator!=(const RoutingAttrs& a, const RoutingAttrs& b) { @@ -52,10 +98,8 @@ inline bool operator!=(const RoutingAttrs& a, const RoutingAttrs& b) { } inline bool operator<(const RoutingAttrs& a, const RoutingAttrs& b) { - return a.fromString < b.fromString || - (a.fromString == b.fromString && a.toString < b.toString) || - (a.fromString == b.fromString && a.toString == b.toString && - a.shortName < b.shortName); + return a.lineFrom < b.lineFrom || + (a.lineFrom == b.lineFrom && a.shortName < b.shortName); } } // namespace router diff --git a/src/pfaedle/router/ShapeBuilder.cpp b/src/pfaedle/router/ShapeBuilder.cpp index ede3c64..bf5857c 100644 --- a/src/pfaedle/router/ShapeBuilder.cpp +++ b/src/pfaedle/router/ShapeBuilder.cpp @@ -2,381 +2,599 @@ // Chair of Algorithms and Data Structures. // Authors: Patrick Brosi -#include +#include +#include +#include #include #include +#include +#include +#include #include #include + #include "ad/cppgtfs/gtfs/Feed.h" -#include "pfaedle/eval/Collector.h" +#include "pfaedle/Def.h" +#include "pfaedle/gtfs/Feed.h" +#include "pfaedle/gtfs/StopTime.h" #include "pfaedle/osm/OsmBuilder.h" #include "pfaedle/router/ShapeBuilder.h" -#include "pfaedle/trgraph/StatGroup.h" +#include "pfaedle/statsimi-classifier/StatsimiClassifier.h" +#include "util/geo/Geo.h" #include "util/geo/output/GeoGraphJsonOutput.h" #include "util/geo/output/GeoJsonOutput.h" #include "util/graph/EDijkstra.h" #include "util/log/Log.h" -using util::geo::FPoint; +using util::geo::DBox; +using util::geo::DPoint; using util::geo::extendBox; -using util::geo::Box; using util::geo::minbox; -using util::geo::FLine; -using util::geo::webMercMeterDist; -using util::geo::webMercToLatLng; -using util::geo::latLngToWebMerc; -using util::geo::output::GeoGraphJsonOutput; -using pfaedle::router::ShapeBuilder; -using pfaedle::router::FeedStops; -using pfaedle::router::NodeCandGroup; -using pfaedle::router::NodeCandRoute; -using pfaedle::router::RoutingAttrs; -using pfaedle::router::EdgeListHops; -using pfaedle::router::Clusters; -using pfaedle::osm::BBoxIdx; -using ad::cppgtfs::gtfs::Stop; -using ad::cppgtfs::gtfs::Trip; -using ad::cppgtfs::gtfs::Feed; -using ad::cppgtfs::gtfs::StopTime; +using util::geo::PolyLine; + +using ad::cppgtfs::gtfs::NO_COLOR; using ad::cppgtfs::gtfs::ShapePoint; +using ad::cppgtfs::gtfs::Stop; +using pfaedle::gtfs::Feed; +using pfaedle::gtfs::StopTime; +using pfaedle::gtfs::Trip; +using pfaedle::osm::BBoxIdx; +using pfaedle::router::EdgeCandGroup; +using pfaedle::router::EdgeCandMap; +using pfaedle::router::EdgeListHops; +using pfaedle::router::FeedStops; +using pfaedle::router::RoutingAttrs; +using pfaedle::router::ShapeBuilder; +using pfaedle::router::Stats; +using pfaedle::router::TripForests; +using pfaedle::router::TripTrie; +using pfaedle::trgraph::EdgeGrid; +using pfaedle::trgraph::NodeGrid; +using util::geo::latLngToWebMerc; +using util::geo::M_PER_DEG; +using util::geo::output::GeoGraphJsonOutput; // _____________________________________________________________________________ -ShapeBuilder::ShapeBuilder(Feed* feed, MOTs mots, - const config::MotConfig& motCfg, - eval::Collector* ecoll, const config::Config& cfg) +ShapeBuilder::ShapeBuilder( + Feed* feed, MOTs mots, const config::MotConfig& motCfg, + pfaedle::trgraph::Graph* g, router::FeedStops* fStops, + osm::Restrictor* restr, + const pfaedle::statsimiclassifier::StatsimiClassifier* classifier, + router::Router* router, const config::Config& cfg) : _feed(feed), _mots(mots), _motCfg(motCfg), - _ecoll(ecoll), _cfg(cfg), - _crouter(_g, omp_get_num_procs()), - _curShpCnt(0) { - _numThreads = _crouter.getCacheNumber(); - writeMotStops(); + _g(g), + _stops(fStops), + _curShpCnt(0), + _restr(restr), + _classifier(classifier), + _router(router) { + pfaedle::osm::BBoxIdx box(BOX_PADDING); + ShapeBuilder::getGtfsBox(feed, mots, cfg.shapeTripId, cfg.dropShapes, &box, + _motCfg.osmBuildOpts.maxSpeed, 0, cfg.verbosity); - // TODO(patrick): maybe do this on demand to avoid graph filtering / reading - // for input where no routing is necessary (already shape'd) - buildGraph(); + _eGrid = EdgeGrid(cfg.gridSize, cfg.gridSize, box.getFullBox(), false); + _nGrid = NodeGrid(cfg.gridSize, cfg.gridSize, box.getFullBox(), false); + + LOG(DEBUG) << "Grid size of " << _nGrid.getXWidth() << "x" + << _nGrid.getYHeight(); + + buildIndex(); } // _____________________________________________________________________________ -void ShapeBuilder::writeMotStops() { - for (auto t : _feed->getTrips()) { - if (!_cfg.shapeTripId.empty() && t.second->getId() != _cfg.shapeTripId) - continue; - if (_mots.count(t.second->getRoute()->getType()) && - _motCfg.mots.count(t.second->getRoute()->getType())) { - for (auto st : t.second->getStopTimes()) { - _stops[st.getStop()] = 0; - } +void ShapeBuilder::buildIndex() { + for (auto* n : _g->getNds()) { + for (auto* e : n->getAdjListOut()) { + if (e->pl().lvl() > _motCfg.osmBuildOpts.maxSnapLevel) continue; + // don't snap to one way edges + if (e->pl().oneWay() == 2) continue; + + _eGrid.add(*e->pl().getGeom(), e); + } + } + + for (auto* n : _g->getNds()) { + // only station nodes + if (n->pl().getSI()) { + _nGrid.add(*n->pl().getGeom(), n); } } } // _____________________________________________________________________________ -FeedStops* ShapeBuilder::getFeedStops() { return &_stops; } +void ShapeBuilder::buildCandCache(const TripForests& forests) { + std::set stops; + size_t count = 0; -// _____________________________________________________________________________ -const NodeCandGroup& ShapeBuilder::getNodeCands(const Stop* s) const { - if (_stops.find(s) == _stops.end() || _stops.at(s) == 0) { - return _emptyNCG; - } - return _stops.at(s)->pl().getSI()->getGroup()->getNodeCands(s); -} - -// _____________________________________________________________________________ -FLine ShapeBuilder::shapeL(const router::NodeCandRoute& ncr, - const router::RoutingAttrs& rAttrs) { - const router::EdgeListHops& res = route(ncr, rAttrs); - - FLine l; - for (const auto& hop : res) { - const trgraph::Node* last = hop.start; - if (hop.edges.size() == 0) { - l.push_back(*hop.start->pl().getGeom()); - l.push_back(*hop.end->pl().getGeom()); - } - for (auto i = hop.edges.rbegin(); i != hop.edges.rend(); i++) { - const auto* e = *i; - if ((e->getFrom() == last) ^ e->pl().isRev()) { - l.insert(l.end(), e->pl().getGeom()->begin(), e->pl().getGeom()->end()); - } else { - l.insert(l.end(), e->pl().getGeom()->rbegin(), - e->pl().getGeom()->rend()); - } - last = e->getOtherNd(last); - } - } - - return l; -} - -// _____________________________________________________________________________ -FLine ShapeBuilder::shapeL(Trip* trip) { - return shapeL(getNCR(trip), getRAttrs(trip)); -} - -// _____________________________________________________________________________ -EdgeListHops ShapeBuilder::route(const router::NodeCandRoute& ncr, - const router::RoutingAttrs& rAttrs) const { - router::Graph g; - - if (_cfg.solveMethod == "global") { - const router::EdgeListHops& ret = - _crouter.route(ncr, rAttrs, _motCfg.routingOpts, _restr, &g); - - // write combination graph - if (!_cfg.shapeTripId.empty() && _cfg.writeCombGraph) { - LOG(INFO) << "Outputting combgraph.json..."; - std::ofstream pstr(_cfg.dbgOutputPath + "/combgraph.json"); - GeoGraphJsonOutput o; - o.print(g, pstr); - } - - return ret; - } else if (_cfg.solveMethod == "greedy") { - return _crouter.routeGreedy(ncr, rAttrs, _motCfg.routingOpts, _restr); - } else if (_cfg.solveMethod == "greedy2") { - return _crouter.routeGreedy2(ncr, rAttrs, _motCfg.routingOpts, _restr); - } else { - LOG(ERROR) << "Unknown solution method " << _cfg.solveMethod; - exit(1); - } - - return EdgeListHops(); -} - -// _____________________________________________________________________________ -pfaedle::router::Shape ShapeBuilder::shape(Trip* trip) const { - LOG(VDEBUG) << "Map-matching shape for trip #" << trip->getId() << " of mot " - << trip->getRoute()->getType() << "(sn=" << trip->getShortname() - << ", rsn=" << trip->getRoute()->getShortName() - << ", rln=" << trip->getRoute()->getLongName() << ")"; - Shape ret; - ret.hops = route(getNCR(trip), getRAttrs(trip)); - ret.avgHopDist = avgHopDist(trip); - - LOG(VDEBUG) << "Finished map-matching for #" << trip->getId(); - - return ret; -} - -// _____________________________________________________________________________ -pfaedle::router::Shape ShapeBuilder::shape(Trip* trip) { - LOG(VDEBUG) << "Map-matching shape for trip #" << trip->getId() << " of mot " - << trip->getRoute()->getType() << "(sn=" << trip->getShortname() - << ", rsn=" << trip->getRoute()->getShortName() - << ", rln=" << trip->getRoute()->getLongName() << ")"; - - Shape ret; - ret.hops = route(getNCR(trip), getRAttrs(trip)); - ret.avgHopDist = avgHopDist(trip); - - LOG(VDEBUG) << "Finished map-matching for #" << trip->getId(); - - return ret; -} - -// _____________________________________________________________________________ -void ShapeBuilder::shape(pfaedle::netgraph::Graph* ng) { - TrGraphEdgs gtfsGraph; - - LOG(INFO) << "Clustering trips..."; - Clusters clusters = clusterTrips(_feed, _mots); - LOG(INFO) << "Clustered trips into " << clusters.size() << " clusters."; - - std::map shpUsage; - for (auto t : _feed->getTrips()) { - if (t.second->getShape()) shpUsage[t.second->getShape()]++; - } - - // to avoid unfair load balance on threads - std::random_shuffle(clusters.begin(), clusters.end()); - - size_t iters = EDijkstra::ITERS; - size_t totiters = EDijkstra::ITERS; - size_t oiters = EDijkstra::ITERS; - size_t j = 0; - - auto t1 = TIME(); - auto t2 = TIME(); - double totAvgDist = 0; - size_t totNumTrips = 0; - -#pragma omp parallel for num_threads(_numThreads) - for (size_t i = 0; i < clusters.size(); i++) { - j++; - - if (j % 10 == 0) { -#pragma omp critical - { - LOG(INFO) << "@ " << j << " / " << clusters.size() << " (" - << (static_cast((j * 1.0) / clusters.size() * 100)) - << "%, " << (EDijkstra::ITERS - oiters) << " iters, tput " - << (static_cast(EDijkstra::ITERS - oiters)) / - TOOK(t1, TIME()) - << " iters/ms" - << ", " << (10.0 / (TOOK(t1, TIME()) / 1000)) - << " trips/sec)"; - - oiters = EDijkstra::ITERS; - t1 = TIME(); - } - } - - // explicitly call const version of shape here for thread safety - const Shape& cshp = - const_cast(*this).shape(clusters[i][0]); - totAvgDist += cshp.avgHopDist; - - if (_cfg.buildTransitGraph) { -#pragma omp critical - { writeTransitGraph(cshp, >fsGraph, clusters[i]); } - } - - std::vector distances; - ad::cppgtfs::gtfs::Shape* shp = - getGtfsShape(cshp, clusters[i][0], &distances); - - LOG(DEBUG) << "Took " << EDijkstra::ITERS - iters << " iterations."; - iters = EDijkstra::ITERS; - - totNumTrips += clusters[i].size(); - - for (auto t : clusters[i]) { - if (_cfg.evaluate) { - _ecoll->add(t, t->getShape(), shp, distances); - } - - if (t->getShape() && shpUsage[t->getShape()] > 0) { - shpUsage[t->getShape()]--; - if (shpUsage[t->getShape()] == 0) { - _feed->getShapes().remove(t->getShape()->getId()); - delete t->getShape(); + for (const auto& forest : forests) { + for (const auto& trie : forest.second) { + for (const auto& trips : trie.getNdTrips()) { + for (const auto& st : trips.second[0]->getStopTimes()) { + stops.insert(st.getStop()); } } - setShape(t, shp, distances); } } - LOG(INFO) << "Done."; - LOG(INFO) << "Matched " << totNumTrips << " trips in " << clusters.size() - << " clusters."; - LOG(INFO) << "Took " << (EDijkstra::ITERS - totiters) - << " iterations in total."; - LOG(INFO) << "Took " << TOOK(t2, TIME()) << " ms in total."; - LOG(INFO) << "Total avg. tput " - << (static_cast(EDijkstra::ITERS - totiters)) / - TOOK(t2, TIME()) - << " iters/sec"; - LOG(INFO) << "Total avg. trip tput " - << (clusters.size() / (TOOK(t2, TIME()) / 1000)) << " trips/sec"; - LOG(INFO) << "Avg hop distance was " - << (totAvgDist / static_cast(clusters.size())) << " meters"; + size_t numThreads = std::thread::hardware_concurrency(); + std::vector thrds(numThreads); + std::vector caches(numThreads); + std::vector> threadStops(numThreads); - if (_cfg.buildTransitGraph) { - LOG(INFO) << "Building transit network graph..."; - buildTrGraph(>fsGraph, ng); - } -} - -// _____________________________________________________________________________ -void ShapeBuilder::setShape(Trip* t, ad::cppgtfs::gtfs::Shape* s, - const std::vector& distances) { - assert(distances.size() == t->getStopTimes().size()); - // set distances size_t i = 0; - for (const StopTime& st : t->getStopTimes()) { - const_cast(st).setShapeDistanceTravelled(distances[i]); + for (auto stop : stops) { + threadStops[i].push_back(stop); + if (++i == numThreads) i = 0; + } + + i = 0; + for (auto& t : thrds) { + t = std::thread(&ShapeBuilder::edgCandWorker, this, &threadStops[i], + &caches[i]); i++; } - t->setShape(s); + for (auto& thr : thrds) thr.join(); - std::lock_guard guard(_shpMutex); - _feed->getShapes().add(s); + // merge + for (size_t i = 0; i < numThreads; i++) { + for (const auto& c : caches[i]) { + _grpCache[c.first] = c.second; + count += c.second.size(); + } + } + + if (_grpCache.size()) + LOG(DEBUG) << "Average candidate set size: " + << ((count * 1.0) / _grpCache.size()); } // _____________________________________________________________________________ -ad::cppgtfs::gtfs::Shape* ShapeBuilder::getGtfsShape( - const Shape& shp, Trip* t, std::vector* hopDists) { - ad::cppgtfs::gtfs::Shape* ret = - new ad::cppgtfs::gtfs::Shape(getFreeShapeId(t)); +EdgeCandGroup ShapeBuilder::getEdgCands(const Stop* s) const { + auto cached = _grpCache.find(s); + if (cached != _grpCache.end()) return cached->second; - assert(shp.hops.size() == t->getStopTimes().size() - 1); + EdgeCandGroup ret; + + const auto& snormzer = _motCfg.osmBuildOpts.statNormzer; + auto normedName = snormzer.norm(s->getName()); + + // the first cand is a placeholder for the stop position itself, it is chosen + // when no candidate yielded a feasible route + auto pos = POINT(s->getLng(), s->getLat()); + ret.push_back({0, 0, 0, pos, 0, {}}); + + double maxMDist = _motCfg.osmBuildOpts.maxStationCandDistance; + + double distor = util::geo::latLngDistFactor(pos); + + if (_cfg.gaussianNoise > 0) { + unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); + std::default_random_engine gen(seed); + + // the standard dev is given in meters, convert (roughly...) to degrees + double standardDev = (_cfg.gaussianNoise / M_PER_DEG) / distor; + + // mean 0 (no movement), standard dev according to config + std::normal_distribution dist(0.0, standardDev); + + // add gaussian noise + pos.setX(pos.getX() + dist(gen)); + pos.setY(pos.getY() + dist(gen)); + } + + std::set frNIdx; + _nGrid.get(util::geo::pad(util::geo::getBoundingBox(pos), + (maxMDist / M_PER_DEG) / distor), + &frNIdx); + + if (_motCfg.routingOpts.useStations) { + for (auto nd : frNIdx) { + assert(nd->pl().getSI()); + + double mDist = util::geo::haversine(pos, *nd->pl().getGeom()); + if (mDist > maxMDist) continue; + + double nameMatchPunish = 0; + double trackMatchPunish = 0; + + if (!_classifier->similar(normedName, pos, nd->pl().getSI()->getName(), + *nd->pl().getGeom())) { + // stations do not match, punish + nameMatchPunish = _motCfg.routingOpts.stationUnmatchedPen; + } + std::string platform = s->getPlatformCode(); + + if (!platform.empty() && !nd->pl().getSI()->getTrack().empty() && + nd->pl().getSI()->getTrack() == platform) { + trackMatchPunish = _motCfg.routingOpts.platformUnmatchedPen; + } + + for (auto* e : nd->getAdjListOut()) { + // don't snap to one way edges + if (e->pl().oneWay() == 2) continue; + ret.push_back({e, + emWeight(mDist) + nameMatchPunish + trackMatchPunish, + 0, + {}, + 0, + {}}); + } + } + } + + maxMDist = _motCfg.osmBuildOpts.maxSnapDistance; + + std::set frEIdx; + _eGrid.get(util::geo::pad(util::geo::getBoundingBox(pos), + (maxMDist / M_PER_DEG) / distor), + &frEIdx); + + std::set selected; + std::map scores; + std::map progrs; + + for (auto edg : frEIdx) { + if (selected.count(edg)) continue; + + auto reach = deg2reachable(edg, selected); + + double mDist = dist(pos, *edg->pl().getGeom()) * distor * M_PER_DEG; + + if (mDist > maxMDist) continue; + + if (!reach || mDist < scores[reach]) { + if (reach) { + selected.erase(selected.find(reach)); + scores.erase(scores.find(reach)); + } + util::geo::PolyLine pl(*edg->pl().getGeom()); + auto lp = pl.projectOn(pos); + double progr = lp.totalPos; + if (edg->pl().isRev()) progr = 1 - progr; + selected.insert(edg); + scores[edg] = mDist; + progrs[edg] = progr; + } + } + + for (auto e : selected) { + ret.push_back({e, + emWeight(scores[e]) + _motCfg.routingOpts.nonStationPen, + progrs[e], + {}, + 0, + {}}); + } + + if (ret.size() == 1 && _cfg.verbosity) { + LOG(WARN) << "No snapping candidate found for stop '" << s->getName() + << "' (" << s->getId() << ")"; + } + + return ret; +} + +// _____________________________________________________________________________ +pfaedle::trgraph::Edge* ShapeBuilder::deg2reachable( + trgraph::Edge* e, std::set edgs) const { + trgraph::Edge* cur = e; + + // forward + while (cur->getTo()->getDeg() == 2) { + // dont allow backtracking on reverse edge + auto next = e->getTo()->getAdjListOut().front()->getTo() == e->getFrom() + ? e->getTo()->getAdjListOut().back() + : e->getTo()->getAdjListOut().front(); + if (next == e || next == cur) break; // avoid circles + if (next->pl().oneWay() == 2) break; // dont follow one way edges + if (edgs.count(next)) return next; + cur = next; + } + + // backward + while (cur->getFrom()->getDeg() == 2) { + // dont allow backtracking on reverse edge + auto next = e->getFrom()->getAdjListIn().front()->getFrom() == e->getTo() + ? e->getFrom()->getAdjListIn().back() + : e->getFrom()->getAdjListIn().front(); + if (next == e || next == cur) break; // avoid circles + if (next->pl().oneWay() == 2) break; // dont follow one way edges + if (edgs.count(cur)) return next; + cur = next; + } + + return 0; +} + +// _____________________________________________________________________________ +std::pair, Stats> ShapeBuilder::shapeL(Trip* trip) { + Stats stats; + try { + T_START(t); + EDijkstra::ITERS = 0; + auto hops = shapeify(trip); + stats.solveTime = T_STOP(t); + stats.numTries = 1; + stats.numTrieLeafs = 1; + stats.totNumTrips = 1; + stats.dijkstraIters = EDijkstra::ITERS; + std::map colors; + LOG(INFO) << "Matched 1 trip in " << std::fixed << std::setprecision(2) + << stats.solveTime << " ms."; + // print to line + return {getGeom(hops, getRAttrs(trip), &colors, trip, 1), stats}; + } catch (const std::runtime_error& e) { + LOG(ERROR) << e.what(); + return {std::vector(), stats}; + } +} + +// _____________________________________________________________________________ +std::map ShapeBuilder::route( + const TripTrie* trie, const EdgeCandMap& ecm, + HopCache* hopCache) const { + return _router->route(trie, ecm, _motCfg.routingOpts, *_restr, hopCache, + _cfg.noFastHops); +} + +// _____________________________________________________________________________ +std::map ShapeBuilder::shapeify( + const TripTrie* trie, HopCache* hopCache) const { + LOG(VDEBUG) << "Map-matching trie " << trie; + + assert(trie->getNdTrips().size()); + assert(trie->getNdTrips().begin()->second.size()); + RoutingAttrs rAttrs = getRAttrs(trie->getNdTrips().begin()->second[0]); + + std::map ret; + + const auto& routes = route(trie, getECM(trie), hopCache); + + for (const auto& route : routes) { + ret[route.first] = route.second; + } + + LOG(VDEBUG) << "Finished map-matching for trie " << trie; + + return ret; +} + +// _____________________________________________________________________________ +EdgeListHops ShapeBuilder::shapeify(Trip* trip) { + LOG(VDEBUG) << "Map-matching shape for trip #" << trip->getId() << " of mot " + << trip->getRoute()->getType() << "(sn=" << trip->getShortname() + << ", rsn=" << trip->getRoute()->getShortName() + << ", rln=" << trip->getRoute()->getLongName() << ")"; + TripTrie trie; + trie.addTrip(trip, getRAttrs(trip), + _motCfg.routingOpts.transPenMethod == "timenorm", false); + const auto& routes = route(&trie, getECM(&trie), 0); + + return routes.begin()->second; +} + +// _____________________________________________________________________________ +Stats ShapeBuilder::shapeify(pfaedle::netgraph::Graph* outNg) { + Stats stats; + EDijkstra::ITERS = 0; + + T_START(cluster); + LOG(DEBUG) << "Clustering trips..."; + const TripForests& forests = clusterTrips(_feed, _mots); + for (const auto& forest : forests) { + for (const auto& trie : forest.second) { + stats.numTries++; + stats.numTrieLeafs += trie.getNdTrips().size(); + } + } + LOG(DEBUG) << "Clustered trips into " << stats.numTries + << " tries with a total of " << stats.numTrieLeafs << " leafs in " + << T_STOP(cluster) << "ms"; + + LOG(DEBUG) << "Building candidate cache..."; + buildCandCache(forests); + LOG(DEBUG) << "Done."; + + std::map shpUse; + RouteRefColors refColors; + + for (auto t : _feed->getTrips()) { + if (!t.getShape().empty()) shpUse[t.getShape()]++; + + // write the colors of trips we won't touch, but whose route we might + if (t.getStopTimes().size() < 2) continue; + if (!_mots.count(t.getRoute()->getType()) || + !_motCfg.mots.count(t.getRoute()->getType())) + continue; + + if (!t.getShape().empty() && !_cfg.dropShapes) { + refColors[t.getRoute()][t.getRoute()->getColor()].push_back(&t); + } + } + + // we implicitely cluster by routing attrs here. This ensures that now two + // threads will access the same routing attrs later on, which safes us an + // expensive locking mechanism later on for the hop cache + std::vector tries; + for (const auto& forest : forests) { + tries.push_back(&(forest.second)); + for (const auto& trie : forest.second) { + for (const auto& trips : trie.getNdTrips()) { + stats.totNumTrips += trips.second.size(); + } + } + } + + auto tStart = TIME(); + std::atomic at(0); + + size_t numThreads = std::thread::hardware_concurrency(); + std::vector thrds(numThreads); + std::vector colors(numThreads); + std::vector gtfsGraphs(numThreads); + + size_t i = 0; + for (auto& t : thrds) { + t = std::thread(&ShapeBuilder::shapeWorker, this, &tries, &at, &shpUse, + &colors[i], >fsGraphs[i]); + i++; + } + + for (auto& thr : thrds) thr.join(); + + stats.solveTime = TOOK(tStart, TIME()); + + LOG(INFO) << "Matched " << stats.totNumTrips << " trips in " << std::fixed + << std::setprecision(2) << stats.solveTime << " ms."; + + // merge colors + for (auto& cols : colors) { + for (auto& route : cols) { + for (auto& col : route.second) { + refColors[route.first][col.first].insert( + refColors[route.first][col.first].end(), col.second.begin(), + col.second.end()); + } + } + } + + // update them in the routes, split routes if necessary + updateRouteColors(refColors); + + if (_cfg.buildTransitGraph) { + LOG(DEBUG) << "Building transit network graph..."; + + // merge gtfsgraph from threads + TrGraphEdgs gtfsGraph; + + for (auto& g : gtfsGraphs) { + for (auto& ePair : g) { + gtfsGraph[ePair.first].insert(gtfsGraph[ePair.first].begin(), + ePair.second.begin(), ePair.second.end()); + } + } + buildNetGraph(>fsGraph, outNg); + } + + stats.dijkstraIters = EDijkstra::ITERS; + + return stats; +} + +// _____________________________________________________________________________ +void ShapeBuilder::updateRouteColors(const RouteRefColors& refColors) { + for (auto& route : refColors) { + if (route.second.size() == 1) { + // only one color found for this route, great! + // update inplace... + route.first->setColor(route.second.begin()->first); + if (route.first->getColor() != NO_COLOR) + route.first->setTextColor(getTextColor(route.first->getColor())); + } else { + // are there fare rules using this route? + std::vector< + std::pair*, + ad::cppgtfs::gtfs::FareRule>> + rules; + + for (auto& f : _feed->getFares()) { + for (auto r : f.second->getFareRules()) { + if (r.getRoute() == route.first) { + rules.push_back({f.second, r}); + } + } + } + + // add new routes... + for (auto& c : route.second) { + // keep the original one intact + if (c.first == route.first->getColor()) continue; + + auto routeCp = *route.first; + + // find free id + std::string newId = route.first->getId() + "::1"; + size_t i = 1; + while (_feed->getRoutes().get(newId)) { + i++; + newId = route.first->getId() + "::" + std::to_string(i); + } + + routeCp.setId(newId); + routeCp.setColor(c.first); + routeCp.setTextColor(getTextColor(routeCp.getColor())); + + auto newRoute = _feed->getRoutes().add(routeCp); + + // update trips to use that route + for (auto& t : c.second) t->setRoute(newRoute); + + // add new fare rules + for (auto a : rules) { + auto rule = a.second; + rule.setRoute(newRoute); + a.first->addFareRule(rule); + } + } + } + } +} + +// _____________________________________________________________________________ +void ShapeBuilder::setShape(Trip* t, const ad::cppgtfs::gtfs::Shape& s, + const std::vector& distances) { + assert(distances.size() == t->getStopTimes().size()); + // set distances + size_t i = 0; + for (const auto& st : t->getStopTimes()) { + const_cast&>(st).setShapeDistanceTravelled(distances[i]); + i++; + } + + std::lock_guard guard(_shpMutex); + auto gtfsShp = _feed->getShapes().add(s); + t->setShape(gtfsShp); +} + +// _____________________________________________________________________________ +ad::cppgtfs::gtfs::Shape ShapeBuilder::getGtfsShape( + const EdgeListHops& hops, Trip* t, size_t numOthers, + const RoutingAttrs& rAttrs, std::vector* hopDists, + uint32_t* bestColor) { + ad::cppgtfs::gtfs::Shape ret(getFreeShapeId(t)); + + assert(hops.size() == t->getStopTimes().size() - 1); + + std::map colors; + + const std::vector& gl = getGeom(hops, rAttrs, &colors, t, numOthers); + const std::vector& measures = getMeasure(gl); size_t seq = 0; - double dist = -1; - double lastDist = -1; hopDists->push_back(0); - FPoint last(0, 0); - for (const auto& hop : shp.hops) { - const trgraph::Node* l = hop.start; - if (hop.edges.size() == 0) { - FPoint ll = webMercToLatLng(hop.start->pl().getGeom()->getX(), - hop.start->pl().getGeom()->getY()); - - if (dist > -0.5) - dist += webMercMeterDist(last, *hop.start->pl().getGeom()); - else - dist = 0; - - last = *hop.start->pl().getGeom(); - - if (dist - lastDist > 0.01) { - ret->addPoint(ShapePoint(ll.getY(), ll.getX(), dist, seq)); - seq++; - lastDist = dist; - } - - dist += webMercMeterDist(last, *hop.end->pl().getGeom()); - last = *hop.end->pl().getGeom(); - - if (dist - lastDist > 0.01) { - ll = webMercToLatLng(hop.end->pl().getGeom()->getX(), - hop.end->pl().getGeom()->getY()); - ret->addPoint(ShapePoint(ll.getY(), ll.getX(), dist, seq)); - seq++; - lastDist = dist; - } - } - for (auto i = hop.edges.rbegin(); i != hop.edges.rend(); i++) { - const auto* e = *i; - if ((e->getFrom() == l) ^ e->pl().isRev()) { - for (size_t i = 0; i < e->pl().getGeom()->size(); i++) { - const FPoint& cur = (*e->pl().getGeom())[i]; - if (dist > -0.5) - dist += webMercMeterDist(last, cur); - else - dist = 0; - last = cur; - if (dist - lastDist > 0.01) { - FPoint ll = webMercToLatLng(cur.getX(), cur.getY()); - ret->addPoint(ShapePoint(ll.getY(), ll.getX(), dist, seq)); - seq++; - lastDist = dist; - } - } - } else { - for (int64_t i = e->pl().getGeom()->size() - 1; i >= 0; i--) { - const FPoint& cur = (*e->pl().getGeom())[i]; - if (dist > -0.5) - dist += webMercMeterDist(last, cur); - else - dist = 0; - last = cur; - if (dist - lastDist > 0.01) { - FPoint ll = webMercToLatLng(cur.getX(), cur.getY()); - ret->addPoint(ShapePoint(ll.getY(), ll.getX(), dist, seq)); - seq++; - lastDist = dist; - } - } - } - l = e->getOtherNd(l); + for (size_t i = 0; i < gl.size(); i++) { + for (size_t j = 0; j < gl[i].size(); j++) { + ret.addPoint( + ShapePoint(gl[i][j].getY(), gl[i][j].getX(), measures[seq], seq)); + seq++; } + hopDists->push_back(measures[seq - 1]); + } - hopDists->push_back(lastDist); + // get most likely color + double best = 0; + *bestColor = NO_COLOR; + for (const auto& c : colors) { + double progr = c.second / measures.back(); + // TODO(patrick): make threshold configurable + if (progr > 0.9 && progr > best) { + best = progr; + *bestColor = c.first; + } } return ret; @@ -386,7 +604,7 @@ ad::cppgtfs::gtfs::Shape* ShapeBuilder::getGtfsShape( std::string ShapeBuilder::getFreeShapeId(Trip* trip) { std::string ret; std::lock_guard guard(_shpMutex); - while (!ret.size() || _feed->getShapes().get(ret)) { + while (!ret.size() || _feed->getShapes().has(ret)) { _curShpCnt++; ret = "shp_"; ret += std::to_string(trip->getRoute()->getType()); @@ -403,19 +621,23 @@ const RoutingAttrs& ShapeBuilder::getRAttrs(const Trip* trip) { if (i == _rAttrs.end()) { router::RoutingAttrs ret; + ret.classifier = _classifier; + const auto& lnormzer = _motCfg.osmBuildOpts.lineNormzer; + const auto& snormzer = _motCfg.osmBuildOpts.statNormzer; - ret.shortName = lnormzer(trip->getRoute()->getShortName()); + ret.shortName = lnormzer.norm(trip->getRoute()->getShortName()); + ret.lineFrom = + snormzer.norm(trip->getStopTimes().front().getStop()->getName()); + ret.lineTo = { + snormzer.norm(trip->getStopTimes().back().getStop()->getName())}; - if (ret.shortName.empty()) ret.shortName = lnormzer(trip->getShortname()); + // fallbacks for line name + if (ret.shortName.empty()) + ret.shortName = lnormzer.norm(trip->getShortname()); if (ret.shortName.empty()) - ret.shortName = lnormzer(trip->getRoute()->getLongName()); - - ret.fromString = _motCfg.osmBuildOpts.statNormzer( - trip->getStopTimes().begin()->getStop()->getName()); - ret.toString = _motCfg.osmBuildOpts.statNormzer( - (--trip->getStopTimes().end())->getStop()->getName()); + ret.shortName = lnormzer.norm(trip->getRoute()->getLongName()); return _rAttrs .insert(std::pair(trip, ret)) @@ -431,179 +653,299 @@ const RoutingAttrs& ShapeBuilder::getRAttrs(const Trip* trip) const { } // _____________________________________________________________________________ -BBoxIdx ShapeBuilder::getPaddedGtfsBox(const Feed* feed, double pad, - const MOTs& mots, - const std::string& tid) { - osm::BBoxIdx box(pad); +void ShapeBuilder::getGtfsBox(const Feed* feed, const MOTs& mots, + const std::string& tid, bool dropShapes, + osm::BBoxIdx* box, double maxSpeed, + std::vector* hopDists, + uint8_t verbosity) { for (const auto& t : feed->getTrips()) { - if (!tid.empty() && t.second->getId() != tid) continue; - if (mots.count(t.second->getRoute()->getType())) { - Box cur = minbox(); - for (const auto& st : t.second->getStopTimes()) { - cur = extendBox( - Point(st.getStop()->getLng(), st.getStop()->getLat()), cur); + if (!tid.empty() && t.getId() != tid) continue; + if (tid.empty() && !t.getShape().empty() && !dropShapes) continue; + if (t.getStopTimes().size() < 2) continue; + + if (mots.count(t.getRoute()->getType())) { + DBox cur; + for (size_t i = 0; i < t.getStopTimes().size(); i++) { + // skip outliers + const auto& st = t.getStopTimes()[i]; + + int toTime = std::numeric_limits::max(); + double toD = 0; + int fromTime = std::numeric_limits::max(); + double fromD = 0; + + if (i > 0) { + const auto& stPrev = t.getStopTimes()[i - 1]; + toTime = st.getArrivalTime().seconds() - + stPrev.getDepartureTime().seconds(); + toD = util::geo::haversine( + st.getStop()->getLat(), st.getStop()->getLng(), + stPrev.getStop()->getLat(), stPrev.getStop()->getLng()); + if (hopDists) hopDists->push_back(toD); + } + + if (i < t.getStopTimes().size() - 1) { + const auto& stNext = t.getStopTimes()[i + 1]; + fromTime = stNext.getArrivalTime().seconds() - + st.getDepartureTime().seconds(); + fromD = util::geo::haversine( + st.getStop()->getLat(), st.getStop()->getLng(), + stNext.getStop()->getLat(), stNext.getStop()->getLng()); + } + + const double reqToTime = toD / maxSpeed; + const double reqFromTime = fromD / maxSpeed; + + const double BUFFER = 5 * 60; + + if (reqToTime > (BUFFER + toTime) * 3 * MAX_ROUTE_COST_DOUBLING_STEPS && + reqFromTime > + (BUFFER + fromTime) * 3 * MAX_ROUTE_COST_DOUBLING_STEPS) { + if (verbosity) { + LOG(WARN) + << "Skipping station '" << st.getStop()->getName() << "' (" + << st.getStop()->getId() << ") @ " << st.getStop()->getLat() + << ", " << st.getStop()->getLng() + << " for bounding box as the vehicle cannot realistically " + "reach and leave it in the scheduled time"; + } else { + LOG(DEBUG) + << "Skipping station '" << st.getStop()->getName() << "' (" + << st.getStop()->getId() << ") @ " << st.getStop()->getLat() + << ", " << st.getStop()->getLng() + << " for bounding box as the vehicle cannot realistically " + "reach and leave it in the scheduled time"; + } + continue; + } + + cur = extendBox(DPoint(st.getStop()->getLng(), st.getStop()->getLat()), + cur); } - box.add(cur); + box->add(cur); } } - - return box; } // _____________________________________________________________________________ -void ShapeBuilder::buildGraph() { - LOG(INFO) << "Reading " << _cfg.osmPath << " ... "; - osm::OsmBuilder osmBuilder; +std::vector ShapeBuilder::getTransTimes(Trip* trip) const { + std::vector ret; - osm::BBoxIdx box = getPaddedGtfsBox(_feed, 2500, _mots, _cfg.shapeTripId); + for (size_t i = 0; i < trip->getStopTimes().size() - 1; i++) { + auto cur = trip->getStopTimes()[i]; + auto next = trip->getStopTimes()[i + 1]; - osmBuilder.read(_cfg.osmPath, _motCfg.osmBuildOpts, &_g, box, _cfg.gridSize, - getFeedStops(), &_restr); + int depTime = cur.getDepartureTime().seconds(); + int arrTime = next.getArrivalTime().seconds(); - for (auto& feedStop : *getFeedStops()) { - if (feedStop.second) { - feedStop.second->pl().getSI()->getGroup()->writePens( - _motCfg.osmBuildOpts.trackNormzer, - _motCfg.routingOpts.platformUnmatchedPen, - _motCfg.routingOpts.stationDistPenFactor, - _motCfg.routingOpts.nonOsmPen); - } - } + int diff = arrTime - depTime; + if (diff < 1) diff = 1; - LOG(INFO) << "Done."; -} - -// _____________________________________________________________________________ -NodeCandRoute ShapeBuilder::getNCR(Trip* trip) const { - router::NodeCandRoute ncr(trip->getStopTimes().size()); - - size_t i = 0; - - for (const auto& st : trip->getStopTimes()) { - ncr[i] = getNodeCands(st.getStop()); - i++; - } - return ncr; -} - -// _____________________________________________________________________________ -double ShapeBuilder::avgHopDist(Trip* trip) const { - size_t i = 0; - double sum = 0; - - const Stop* prev = 0; - - for (const auto& st : trip->getStopTimes()) { - if (!prev) { - prev = st.getStop(); - continue; - } - auto a = util::geo::latLngToWebMerc(prev->getLat(), prev->getLng()); - auto b = util::geo::latLngToWebMerc(st.getStop()->getLat(), - st.getStop()->getLng()); - sum += util::geo::webMercMeterDist(a, b); - - prev = st.getStop(); - i++; - } - return sum / static_cast(i); -} - -// _____________________________________________________________________________ -Clusters ShapeBuilder::clusterTrips(Feed* f, MOTs mots) { - // building an index [start station, end station] -> [cluster] - - std::map> clusterIdx; - - size_t j = 0; - - Clusters ret; - for (const auto& trip : f->getTrips()) { - // if (trip.second->getId() != "L5Cvl_T01") continue; - if (trip.second->getShape() && !_cfg.dropShapes) continue; - if (trip.second->getStopTimes().size() < 2) continue; - if (!mots.count(trip.second->getRoute()->getType()) || - !_motCfg.mots.count(trip.second->getRoute()->getType())) - continue; - bool found = false; - auto spair = StopPair(trip.second->getStopTimes().begin()->getStop(), - trip.second->getStopTimes().rbegin()->getStop()); - const auto& c = clusterIdx[spair]; - - for (size_t i = 0; i < c.size(); i++) { - j++; - if (routingEqual(ret[c[i]][0], trip.second)) { - ret[c[i]].push_back(trip.second); - found = true; - break; - } - } - if (!found) { - ret.push_back({trip.second}); - // explicit call to write render attrs to cache - getRAttrs(trip.second); - clusterIdx[spair].push_back(ret.size() - 1); - } + ret.push_back(diff); + assert(ret.back() >= 0); } return ret; } // _____________________________________________________________________________ -bool ShapeBuilder::routingEqual(const Stop* a, const Stop* b) { - if (a == b) return true; // trivial +std::vector ShapeBuilder::getTransDists(Trip* trip) const { + std::vector ret; - auto namea = _motCfg.osmBuildOpts.statNormzer(a->getName()); - auto nameb = _motCfg.osmBuildOpts.statNormzer(b->getName()); - if (namea != nameb) return false; + for (size_t i = 0; i < trip->getStopTimes().size() - 1; i++) { + auto cur = trip->getStopTimes()[i]; + auto next = trip->getStopTimes()[i + 1]; - auto tracka = _motCfg.osmBuildOpts.trackNormzer(a->getPlatformCode()); - auto trackb = _motCfg.osmBuildOpts.trackNormzer(b->getPlatformCode()); - if (tracka != trackb) return false; + double dist = util::geo::haversine( + cur.getStop()->getLat(), cur.getStop()->getLng(), + next.getStop()->getLat(), next.getStop()->getLng()); - FPoint ap = util::geo::latLngToWebMerc(a->getLat(), a->getLng()); - FPoint bp = util::geo::latLngToWebMerc(b->getLat(), b->getLng()); - - double d = util::geo::webMercMeterDist(ap, bp); - - if (d > 1) return false; - - return true; -} - -// _____________________________________________________________________________ -bool ShapeBuilder::routingEqual(Trip* a, Trip* b) { - if (a->getStopTimes().size() != b->getStopTimes().size()) return false; - if (getRAttrs(a) != getRAttrs(b)) return false; - - auto stb = b->getStopTimes().begin(); - for (const auto& sta : a->getStopTimes()) { - if (!routingEqual(sta.getStop(), stb->getStop())) { - return false; - } - stb++; + ret.push_back(dist); } - return true; + return ret; } // _____________________________________________________________________________ -const pfaedle::trgraph::Graph* ShapeBuilder::getGraph() const { return &_g; } +EdgeCandMap ShapeBuilder::getECM( + const TripTrie* trie) const { + EdgeCandMap ecm(trie->getNds().size()); + + for (size_t nid = 1; nid < trie->getNds().size(); nid++) { + auto trNd = trie->getNds()[nid]; + auto parentTrNd = trie->getNds()[trNd.parent]; + + if (nid != 1 && !trNd.arr) continue; + + double avgT = 0; + + if (trNd.trips) avgT = trNd.accTime / trNd.trips; + + const auto& cands = getEdgCands(trNd.reprStop); + ecm[nid].reserve(cands.size()); + + for (auto& cand : cands) { + const auto& timeExpCands = timeExpand(cand, avgT); + assert(timeExpCands.size()); + + for (size_t depChildId : trNd.childs) { + if (nid == 1) break; + auto chldTrNd = trie->getNds()[depChildId]; + double avgChildT = 0; + if (chldTrNd.trips) avgChildT = chldTrNd.accTime / chldTrNd.trips; + + double timeDiff = avgChildT - avgT; + if (timeDiff < 0) timeDiff = 0; + + for (size_t candId = 0; candId < timeExpCands.size(); candId++) { + const auto& cand = timeExpCands[candId]; + ecm[depChildId].push_back(cand); + ecm[depChildId].back().time += timeDiff; + + ecm[depChildId].back().pen = timePen(cand.time, avgChildT); + + for (size_t sucCandId = 0; sucCandId < timeExpCands.size(); + sucCandId++) { + if (timeExpCands[sucCandId].time <= ecm[depChildId].back().time) { + ecm[depChildId].back().depPrede.push_back(sucCandId + + ecm[nid].size()); + } + } + assert(ecm[depChildId].back().depPrede.size()); + } + } + ecm[nid].insert(ecm[nid].end(), timeExpCands.begin(), timeExpCands.end()); + } + + assert(ecm[nid].size() != 0); + } + + return ecm; +} // _____________________________________________________________________________ -void ShapeBuilder::writeTransitGraph(const Shape& shp, TrGraphEdgs* edgs, - const Cluster& cluster) const { - for (auto hop : shp.hops) { +double ShapeBuilder::timePen(int candTime, int schedTime) const { + // standard deviation of normal distribution + double standarddev = 5 * 60; + + int diff = abs(candTime - schedTime); + + double cNorm = diff / standarddev; + return cNorm * cNorm; +} + +// _____________________________________________________________________________ +EdgeCandGroup ShapeBuilder::timeExpand(const EdgeCand& ec, int time) const { + EdgeCandGroup ret; + // TODO(patrick): heuristic for time expansion variance, currently + // unused + for (int i = 0; i < 1; i++) { + EdgeCand ecNew = ec; + // in 30 sec steps + ecNew.time = time + i * 30; + ecNew.pen = ecNew.pen + timePen(ecNew.time, time); + ret.push_back(ecNew); + } + + return ret; +} + +// _____________________________________________________________________________ +TripForests ShapeBuilder::clusterTrips(Feed* f, MOTs mots) { + TripForests forest; + std::map> trips; + + // warm the stop name normalizer caches so a + // multithreaded access later on will never write to the underlying cache + for (auto& stop : f->getStops()) { + const auto& snormzer = _motCfg.osmBuildOpts.statNormzer; + auto normedName = snormzer.norm(stop.getName()); + } + + // cluster by routing attr for parallization later on + for (auto& trip : f->getTrips()) { + if (!_cfg.dropShapes && !trip.getShape().empty()) continue; + if (trip.getStopTimes().size() < 2) continue; + if (!mots.count(trip.getRoute()->getType()) || + !_motCfg.mots.count(trip.getRoute()->getType())) + continue; + + // important: we are building the routing attributes here, so a + // multithreaded access later on will never write to the underlying cache + const auto& rAttrs = getRAttrs(&trip); + + trips[rAttrs].push_back(&trip); + forest[rAttrs] = {}; + } + + size_t numThreads = std::thread::hardware_concurrency(); + std::vector thrds(numThreads); + std::vector> attrs(numThreads); + + size_t i = 0; + for (auto it : trips) { + attrs[i].push_back(it.first); + if (++i == numThreads) i = 0; + } + + i = 0; + for (auto& t : thrds) { + t = std::thread(&ShapeBuilder::clusterWorker, this, &attrs[i], &trips, + &forest); + i++; + } + + for (auto& thr : thrds) thr.join(); + + return forest; +} + +// _____________________________________________________________________________ +void ShapeBuilder::clusterWorker( + const std::vector* rAttrsVec, + const std::map>* trips, + TripForests* forest) { + for (const auto& rAttrs : *rAttrsVec) { + for (auto& trip : trips->at(rAttrs)) { + bool ins = false; + auto& subForest = forest->at(rAttrs); + for (auto& trie : subForest) { + if (trie.addTrip(trip, rAttrs, + _motCfg.routingOpts.transPenMethod == "timenorm", + _cfg.noTrie)) { + ins = true; + break; + } + } + + if (!ins) { + subForest.resize(subForest.size() + 1); + subForest.back().addTrip( + trip, rAttrs, _motCfg.routingOpts.transPenMethod == "timenorm", + false); + } + } + } +} + +// _____________________________________________________________________________ +const pfaedle::trgraph::Graph* ShapeBuilder::getGraph() const { return _g; } + +// _____________________________________________________________________________ +void ShapeBuilder::writeTransitGraph( + const router::EdgeListHops& hops, TrGraphEdgs* edgs, + const std::vector& trips) const { + for (const auto& hop : hops) { for (const auto* e : hop.edges) { - if (e->pl().isRev()) e = _g.getEdg(e->getTo(), e->getFrom()); - (*edgs)[e].insert(cluster.begin(), cluster.end()); + if (e->pl().isRev()) e = _g->getEdg(e->getTo(), e->getFrom()); + (*edgs)[e].insert((*edgs)[e].begin(), trips.begin(), trips.end()); } } } // _____________________________________________________________________________ -void ShapeBuilder::buildTrGraph(TrGraphEdgs* edgs, - pfaedle::netgraph::Graph* ng) const { +void ShapeBuilder::buildNetGraph(TrGraphEdgs* edgs, + pfaedle::netgraph::Graph* ng) const { std::unordered_map nodes; for (auto ep : *edgs) { @@ -625,3 +967,308 @@ void ShapeBuilder::buildTrGraph(TrGraphEdgs* edgs, pfaedle::netgraph::EdgePL(*e->pl().getGeom(), ep.second)); } } + +// _____________________________________________________________________________ +std::vector ShapeBuilder::getGeom(const EdgeListHops& hops, + const RoutingAttrs& rAttrs, + std::map* colors, + Trip* t, size_t numOthers) const { + std::vector ret; + + for (size_t i = hops.size(); i > 0; i--) { + const auto& hop = hops[i - 1]; + if (!hop.start || !hop.end) { + // no hop was found, use the fallback geometry + + if (_cfg.verbosity) { + const auto stopFr = t->getStopTimes()[hops.size() - i].getStop(); + const auto stopTo = t->getStopTimes()[hops.size() - i + 1].getStop(); + + LOG(WARN) << "No viable hop found between stops '" << stopFr->getName() + << "' (" << stopFr->getId() << ") and '" << stopTo->getName() + << "' (" << stopTo->getId() << ") for trip " << t->getId() + << " of type '" + << ad::cppgtfs::gtfs::flat::Route::getTypeString( + t->getRoute()->getType()) + << "'" + << (numOthers > 1 ? " (and " + std::to_string(numOthers) + + " similar trips)" + : "") + << ", falling back to straight line"; + } + + if (hop.start) { + if (hop.progrStart > 0) { + auto l = getLine(hop.start); + PolyLine pl(l); + const auto& seg = pl.getSegment(hop.progrStart, 1); + ret.push_back({seg.getLine().front(), hop.pointEnd}); + } else { + ret.push_back({*hop.start->getFrom()->pl().getGeom(), hop.pointEnd}); + } + } else if (hop.end) { + if (hop.progrEnd > 0) { + auto l = getLine(hop.end); + PolyLine pl(l); + const auto& seg = pl.getSegment(0, hop.progrEnd); + ret.push_back({hop.pointStart, seg.getLine().back()}); + } else { + ret.push_back({hop.pointStart, *hop.end->getFrom()->pl().getGeom()}); + } + } else { + ret.push_back({hop.pointStart, hop.pointEnd}); + } + } else { + const auto& l = getLine(hop, rAttrs, colors); + ret.push_back(l); + } + } + + return ret; +} + +// _____________________________________________________________________________ +LINE ShapeBuilder::getLine(const EdgeListHop& hop, const RoutingAttrs& rAttrs, + std::map* colors) const { + LINE l; + + const auto& curL = getLine(hop.start); + + if (hop.edges.size() == 0) { + // draw direct line between positions on edges + if (hop.progrStart > 0) { + PolyLine pl(curL); + const auto& seg = pl.getSegment(hop.progrStart, 1); + l.push_back(seg.front()); + } else { + l.push_back(curL.front()); + } + + if (hop.progrEnd > 0) { + PolyLine pl(getLine(hop.end)); + const auto& seg = pl.getSegment(0, hop.progrEnd); + l.push_back(seg.back()); + } else { + l.push_back(*hop.end->getFrom()->pl().getGeom()); + } + + return l; + } + + // special case: start and end are on the same edge! + if (hop.edges.size() == 1 && hop.start == hop.end) { + PolyLine pl(curL); + const auto& seg = pl.getSegment(hop.progrStart, hop.progrEnd); + l.insert(l.end(), seg.getLine().begin(), seg.getLine().end()); + + for (const auto& color : getColorMatch(hop.start, rAttrs)) { + (*colors)[color] += hop.start->pl().getLength(); + } + + return l; + } + + auto from = hop.start->getFrom(); + + if (hop.progrStart > 0) { + PolyLine pl(curL); + const auto& seg = pl.getSegment(hop.progrStart, 1); + l.insert(l.end(), seg.getLine().begin(), seg.getLine().end()); + + double l = hop.start->pl().getLength() * (1 - hop.progrStart); + for (const auto& color : getColorMatch(hop.start, rAttrs)) { + (*colors)[color] += l; + } + } else { + l.insert(l.end(), curL.begin(), curL.end()); + + double l = hop.start->pl().getLength(); + for (const auto& color : getColorMatch(hop.start, rAttrs)) { + (*colors)[color] += l; + } + } + + from = hop.start->getOtherNd(from); + + if (hop.edges.size() > 1) { + for (size_t j = hop.edges.size() - 2; j > 0; j--) { + const auto* e = hop.edges[j]; + const auto& curL = getLine(e); + l.insert(l.end(), curL.begin(), curL.end()); + from = e->getOtherNd(from); + + double l = e->pl().getLength(); + for (const auto& color : getColorMatch(e, rAttrs)) { + (*colors)[color] += l; + } + } + } + + if (hop.progrEnd > 0) { + PolyLine pl(getLine(hop.end)); + const auto& seg = pl.getSegment(0, hop.progrEnd); + l.insert(l.end(), seg.getLine().begin(), seg.getLine().end()); + + double l = hop.end->pl().getLength() * hop.progrEnd; + for (const auto& color : getColorMatch(hop.end, rAttrs)) { + (*colors)[color] += l; + } + } + + if (l.size() > 1) return util::geo::simplify(l, 0.5 / M_PER_DEG); + return l; +} + +// _____________________________________________________________________________ +LINE ShapeBuilder::getLine(const trgraph::Edge* e) const { + LINE l; + if (!e->pl().getGeom() || e->pl().getGeom()->size() == 0) + return {*e->getFrom()->pl().getGeom(), *e->getTo()->pl().getGeom()}; + if (e->pl().isRev()) { + l.insert(l.end(), e->pl().getGeom()->rbegin(), e->pl().getGeom()->rend()); + } else { + l.insert(l.end(), e->pl().getGeom()->begin(), e->pl().getGeom()->end()); + } + return l; +} + +// _____________________________________________________________________________ +std::vector ShapeBuilder::getMeasure( + const std::vector& lines) const { + assert(lines.size()); + assert(lines.front().size()); + std::vector ret; + POINT last = lines.front().front(); + + for (const auto& l : lines) { + for (size_t i = 0; i < l.size(); i++) { + if (ret.size() == 0) { + ret.push_back(0); + } else { + float v = ret.back() + util::geo::haversine(last, l[i]); + assert(v >= ret.back()); // required by GTFS standard! + ret.push_back(v); + } + last = l[i]; + } + } + + return ret; +} + +// _____________________________________________________________________________ +void ShapeBuilder::shapeWorker( + const std::vector* tries, std::atomic* at, + std::map* shpUse, + std::map>>* routeColors, + TrGraphEdgs* gtfsGraph) { + while (1) { + size_t j = (*at)++; + if (j >= tries->size()) return; + + int step = tries->size() < 10 ? tries->size() : 10; + + if (j % (tries->size() / step) == 0) { + LOG(INFO) << "@ " << (static_cast((j * 1.0) / tries->size() * 100)) + << "%"; + LOG(DEBUG) << "(@ trie forest " << j << "/" << tries->size() << ")"; + } + + const auto& forest = *((*tries)[j]); + + // hop cache per forest, thus per routing attributes + HopCache hopCacheLoc; + HopCache* hopCache = 0; + + if (!_cfg.noHopCache) hopCache = &hopCacheLoc; + + for (size_t i = 0; i < forest.size(); i++) { + const TripTrie* trie = &(forest[i]); + const auto& hops = shapeify(trie, hopCache); + + for (const auto& leaf : trie->getNdTrips()) { + std::vector distances; + const RoutingAttrs& rAttrs = trie->getNd(leaf.first).rAttrs; + + uint32_t color; + + const ad::cppgtfs::gtfs::Shape& shp = + getGtfsShape(hops.at(leaf.first), leaf.second[0], + leaf.second.size(), rAttrs, &distances, &color); + + if (_cfg.buildTransitGraph) { + writeTransitGraph(hops.at(leaf.first), gtfsGraph, leaf.second); + } + + for (auto t : leaf.second) { + if (_cfg.writeColors && color != NO_COLOR && + t->getRoute()->getColor() == NO_COLOR && + t->getRoute()->getTextColor() == NO_COLOR) { + (*routeColors)[t->getRoute()][color].push_back(t); + } else { + // else, use the original route color + (*routeColors)[t->getRoute()][t->getRoute()->getColor()].push_back( + t); + } + + if (!t->getShape().empty() && (*shpUse)[t->getShape()] > 0) { + (*shpUse)[t->getShape()]--; + if ((*shpUse)[t->getShape()] == 0) { + std::lock_guard guard(_shpMutex); + _feed->getShapes().remove(t->getShape()); + } + } + setShape(t, shp, distances); + } + } + } + } +} + +// _____________________________________________________________________________ +void ShapeBuilder::edgCandWorker(std::vector* stops, + GrpCache* cache) { + for (auto stop : *stops) { + (*cache)[stop] = getEdgCands(stop); + } +} + +// _____________________________________________________________________________ +std::set ShapeBuilder::getColorMatch( + const trgraph::Edge* e, const RoutingAttrs& rAttrs) const { + std::set ret; + for (const auto* l : e->pl().getLines()) { + auto simi = rAttrs.simi(l); + if (simi.nameSimilar && l->color != NO_COLOR) ret.insert(l->color); + } + + return ret; +} + +// _____________________________________________________________________________ +uint32_t ShapeBuilder::getTextColor(uint32_t c) const { + double r = (c & 0x00FF0000) >> 16; + double g = (c & 0x0000FF00) >> 8; + double b = (c & 0x000000FF); + + // gray value approx + double a = sqrt((r * r + g * g + b * b) / 3); + + // below a certain gray value, use white, else black + if (a < 140) return 0x00FFFFFF; + return 0; +} + +// _____________________________________________________________________________ +double ShapeBuilder::emWeight(double mDist) const { + if (_motCfg.routingOpts.emPenMethod == "exp") { + return mDist * _motCfg.routingOpts.stationDistPenFactor; + } + + if (_motCfg.routingOpts.emPenMethod == "norm") { + double s = mDist * _motCfg.routingOpts.stationDistPenFactor; + return 0.5 * s * s; + } + + return mDist; +} diff --git a/src/pfaedle/router/ShapeBuilder.h b/src/pfaedle/router/ShapeBuilder.h index f782fac..c0309e2 100644 --- a/src/pfaedle/router/ShapeBuilder.h +++ b/src/pfaedle/router/ShapeBuilder.h @@ -5,40 +5,46 @@ #ifndef PFAEDLE_ROUTER_SHAPEBUILDER_H_ #define PFAEDLE_ROUTER_SHAPEBUILDER_H_ +#include #include #include #include #include #include #include + #include "ad/cppgtfs/gtfs/Feed.h" +#include "pfaedle/Def.h" #include "pfaedle/config/MotConfig.h" #include "pfaedle/config/PfaedleConfig.h" -#include "pfaedle/eval/Collector.h" +#include "pfaedle/gtfs/Feed.h" #include "pfaedle/netgraph/Graph.h" #include "pfaedle/osm/Restrictor.h" #include "pfaedle/router/Misc.h" #include "pfaedle/router/Router.h" +#include "pfaedle/router/Stats.h" +#include "pfaedle/router/TripTrie.h" +#include "pfaedle/statsimi-classifier/StatsimiClassifier.h" #include "pfaedle/trgraph/Graph.h" +#include "util/geo/Geo.h" namespace pfaedle { namespace router { -using ad::cppgtfs::gtfs::Stop; -using ad::cppgtfs::gtfs::Trip; -using ad::cppgtfs::gtfs::Feed; - -struct Shape { - router::EdgeListHops hops; - double avgHopDist; -}; - -typedef std::vector Cluster; -typedef std::vector Clusters; -typedef std::pair StopPair; -typedef std::unordered_map TripRAttrs; -typedef std::unordered_map> +typedef std::vector> TripForest; +typedef std::map TripForests; +typedef std::pair + StopPair; +typedef std::unordered_map + TripRAttrs; +typedef std::unordered_map> TrGraphEdgs; +typedef std::map>> + RouteRefColors; +typedef std::unordered_map + GrpCache; /* * Layer class for the router. Provides an interface for direct usage with @@ -46,74 +52,121 @@ typedef std::unordered_map> */ class ShapeBuilder { public: - ShapeBuilder(Feed* feed, MOTs mots, const config::MotConfig& motCfg, - eval::Collector* ecoll, const config::Config& cfg); + ShapeBuilder( + pfaedle::gtfs::Feed* feed, MOTs mots, const config::MotConfig& motCfg, + trgraph::Graph* g, router::FeedStops* stops, osm::Restrictor* restr, + const pfaedle::statsimiclassifier::StatsimiClassifier* classifier, + router::Router* router, const config::Config& cfg); - void shape(pfaedle::netgraph::Graph* ng); + Stats shapeify(pfaedle::netgraph::Graph* outNg); router::FeedStops* getFeedStops(); - const NodeCandGroup& getNodeCands(const Stop* s) const; + // shape single trip + std::pair, Stats> shapeL(pfaedle::gtfs::Trip* trip); - util::geo::FLine shapeL(const router::NodeCandRoute& ncr, - const router::RoutingAttrs& rAttrs); - util::geo::FLine shapeL(Trip* trip); - - pfaedle::router::Shape shape(Trip* trip) const; - pfaedle::router::Shape shape(Trip* trip); + std::map shapeify( + const TripTrie* trie, HopCache* hopCache) const; + EdgeListHops shapeify(pfaedle::gtfs::Trip* trip); const trgraph::Graph* getGraph() const; - static osm::BBoxIdx getPaddedGtfsBox(const Feed* feed, double pad, - const MOTs& mots, - const std::string& tid); + static void getGtfsBox(const pfaedle::gtfs::Feed* feed, const MOTs& mots, + const std::string& tid, bool dropShapes, + osm::BBoxIdx* box, double maxSpeed, + std::vector* hopDists, uint8_t verbosity); private: - Feed* _feed; + pfaedle::gtfs::Feed* _feed; MOTs _mots; config::MotConfig _motCfg; - eval::Collector* _ecoll; config::Config _cfg; - trgraph::Graph _g; - router::Router _crouter; + trgraph::Graph* _g; + router::FeedStops* _stops; - router::FeedStops _stops; + EdgeCandGroup _emptyNCG; - NodeCandGroup _emptyNCG; - - size_t _curShpCnt, _numThreads; + size_t _curShpCnt; std::mutex _shpMutex; TripRAttrs _rAttrs; - osm::Restrictor _restr; + osm::Restrictor* _restr; + const pfaedle::statsimiclassifier::StatsimiClassifier* _classifier; + GrpCache _grpCache; - void writeMotStops(); - void buildGraph(); + router::Router* _router; - Clusters clusterTrips(Feed* f, MOTs mots); - void writeTransitGraph(const Shape& shp, TrGraphEdgs* edgs, - const Cluster& cluster) const; - void buildTrGraph(TrGraphEdgs* edgs, pfaedle::netgraph::Graph* ng) const; + TripForests clusterTrips(pfaedle::gtfs::Feed* f, MOTs mots); + void buildNetGraph(TrGraphEdgs* edgs, pfaedle::netgraph::Graph* ng) const; - std::string getFreeShapeId(Trip* t); + std::string getFreeShapeId(pfaedle::gtfs::Trip* t); + ad::cppgtfs::gtfs::Shape getGtfsShape(const EdgeListHops& shp, + pfaedle::gtfs::Trip* t, + size_t numOthers, + const RoutingAttrs& rAttrs, + std::vector* hopDists, + uint32_t* bestColor); - ad::cppgtfs::gtfs::Shape* getGtfsShape(const Shape& shp, Trip* t, - std::vector* hopDists); + void setShape(pfaedle::gtfs::Trip* t, const ad::cppgtfs::gtfs::Shape& s, + const std::vector& dists); - void setShape(Trip* t, ad::cppgtfs::gtfs::Shape* s, - const std::vector& dists); + EdgeCandGroup getEdgCands(const ad::cppgtfs::gtfs::Stop* s) const; - router::NodeCandRoute getNCR(Trip* trip) const; - double avgHopDist(Trip* trip) const; - const router::RoutingAttrs& getRAttrs(const Trip* trip) const; - const router::RoutingAttrs& getRAttrs(const Trip* trip); - bool routingEqual(Trip* a, Trip* b); - bool routingEqual(const Stop* a, const Stop* b); - router::EdgeListHops route(const router::NodeCandRoute& ncr, - const router::RoutingAttrs& rAttrs) const; + router::EdgeCandMap getECM(const TripTrie* trie) const; + std::vector getTransTimes(pfaedle::gtfs::Trip* trip) const; + std::vector getTransDists(pfaedle::gtfs::Trip* trip) const; + const router::RoutingAttrs& getRAttrs(const pfaedle::gtfs::Trip* trip) const; + const router::RoutingAttrs& getRAttrs(const pfaedle::gtfs::Trip* trip); + std::map route( + const TripTrie* trie, const EdgeCandMap& ecm, + HopCache* hopCache) const; + double emWeight(double mDist) const; + + void buildCandCache(const TripForests& clusters); + void buildIndex(); + + std::vector getGeom(const EdgeListHops& shp, const RoutingAttrs& rAttrs, + std::map* colors, Trip* t, + size_t numOthers) const; + double timePen(int candTime, int schedTime) const; + + LINE getLine(const EdgeListHop& hop, const RoutingAttrs&, + std::map* colMap) const; + LINE getLine(const trgraph::Edge* edg) const; + std::vector getMeasure(const std::vector& lines) const; + + trgraph::Edge* deg2reachable(trgraph::Edge* e, + std::set edgs) const; + + EdgeCandGroup timeExpand(const EdgeCand& ec, int time) const; + + std::set getColorMatch(const trgraph::Edge* e, + const RoutingAttrs& rAttrs) const; + + void updateRouteColors(const RouteRefColors& c); + + uint32_t getTextColor(uint32_t c) const; + + void writeTransitGraph(const router::EdgeListHops& shp, TrGraphEdgs* edgs, + const std::vector& trips) const; + + void shapeWorker( + const std::vector* tries, std::atomic* at, + std::map* shpUsage, + std::map>>*, + TrGraphEdgs* gtfsGraph); + + void edgCandWorker(std::vector* stops, GrpCache* cache); + void clusterWorker(const std::vector* rAttrs, + const std::map>* trips, + TripForests* forest); + + pfaedle::trgraph::EdgeGrid _eGrid; + pfaedle::trgraph::NodeGrid _nGrid; }; + } // namespace router } // namespace pfaedle diff --git a/src/pfaedle/router/Stats.h b/src/pfaedle/router/Stats.h new file mode 100644 index 0000000..fa3659a --- /dev/null +++ b/src/pfaedle/router/Stats.h @@ -0,0 +1,48 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_ROUTER_STATS_H_ +#define PFAEDLE_ROUTER_STATS_H_ + +#include +#include +#include +#include "util/String.h" + +namespace pfaedle { +namespace router { + +struct Stats { + Stats() + : totNumTrips(0), + numTries(0), + numTrieLeafs(0), + solveTime(0), + dijkstraIters(0) {} + size_t totNumTrips; + size_t numTries; + size_t numTrieLeafs; + double solveTime; + size_t dijkstraIters; +}; + +inline Stats operator+ (const Stats& c1, const Stats& c2) { + Stats ret = c1; + ret.totNumTrips += c2.totNumTrips; + ret.numTries += c2.numTries; + ret.numTrieLeafs += c2.numTrieLeafs; + ret.solveTime += c2.solveTime; + ret.dijkstraIters += c2.dijkstraIters; + return ret; +} + +inline Stats& operator+= (Stats& c1, const Stats& c2) { + c1 = c1 + c2; + return c1; +} + +} // namespace router +} // namespace pfaedle + +#endif // PFAEDLE_ROUTER_STATS_H_ diff --git a/src/pfaedle/router/TripTrie.h b/src/pfaedle/router/TripTrie.h new file mode 100644 index 0000000..dfcfde5 --- /dev/null +++ b/src/pfaedle/router/TripTrie.h @@ -0,0 +1,67 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_ROUTER_TRIPTRIE_H_ +#define PFAEDLE_ROUTER_TRIPTRIE_H_ + +#include +#include +#include +#include "ad/cppgtfs/gtfs/Feed.h" +#include "pfaedle/gtfs/Feed.h" +#include "pfaedle/gtfs/StopTime.h" +#include "pfaedle/router/RoutingAttrs.h" + +namespace pfaedle { +namespace router { + +struct TripTrieNd { + const ad::cppgtfs::gtfs::Stop* reprStop; + std::string stopName; // the stop name at this node + std::string platform; // the platform of node + POINT pos; // the position of this node + double lat, lng; + int time; + bool arr; + int accTime; + size_t trips; + size_t parent; + std::vector childs; + RoutingAttrs rAttrs; +}; + +template +class TripTrie { + public: + // init node 0, this is the first decision node + TripTrie() : _nds(1) {} + bool addTrip(TRIP* trip, const RoutingAttrs& rAttrs, + bool timeEx, bool degen); + + const std::vector& getNds() const; + const TripTrieNd& getNd(size_t nid) const; + + void toDot(std::ostream& os, const std::string& rootName, size_t gid) const; + const std::map>& getNdTrips() const; + + private: + std::vector _nds; + std::map _tripNds; + std::map> _ndTrips; + + bool add(TRIP* trip, const RoutingAttrs& rAttrs, bool timeEx); + size_t get(TRIP* trip, bool timeEx); + + size_t getMatchChild(size_t parentNid, const std::string& stopName, + const std::string& platform, POINT pos, int time, + bool timeEx) const; + size_t insert(const ad::cppgtfs::gtfs::Stop* stop, const RoutingAttrs& rAttrs, + const POINT& pos, int time, bool arr, size_t parent); +}; + +#include "pfaedle/router/TripTrie.tpp" +} // namespace router +} // namespace pfaedle + +#endif // PFAEDLE_ROUTER_TRIPTRIE_H_ diff --git a/src/pfaedle/router/TripTrie.tpp b/src/pfaedle/router/TripTrie.tpp new file mode 100644 index 0000000..9f78915 --- /dev/null +++ b/src/pfaedle/router/TripTrie.tpp @@ -0,0 +1,246 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#include +#include +#include + +#include "TripTrie.h" +#include "ad/cppgtfs/gtfs/Feed.h" +#include "pfaedle/gtfs/Feed.h" +#include "pfaedle/gtfs/StopTime.h" + +using pfaedle::gtfs::Trip; +using pfaedle::router::TripTrie; + +// _____________________________________________________________________________ +template +bool TripTrie::addTrip(TRIP* trip, const RoutingAttrs& rAttrs, + bool timeEx, bool degen) { + if (!degen) return add(trip, rAttrs, timeEx); + + // check if trip is already fully and uniquely contained, if not, fail + size_t existing = get(trip, timeEx); + if (existing && _nds[existing].childs.size() == 0) { + _tripNds[trip] = existing; + _ndTrips[existing].push_back(trip); + return true; + } else { + return false; + } +} + +// _____________________________________________________________________________ +template +bool TripTrie::add(TRIP* trip, const RoutingAttrs& rAttrs, bool timeEx) { + if (trip->getStopTimes().size() == 0) return false; + + int startSecs = 0; + + if (!trip->getStopTimes().front().getDepartureTime().empty()) { + startSecs = trip->getStopTimes().front().getDepartureTime().seconds(); + } + + size_t curNdId = 0; + for (size_t stId = 0; stId < trip->getStopTimes().size(); stId++) { + const auto st = trip->getStopTimes()[stId]; + + std::string name = st.getStop()->getName(); + std::string platform = st.getStop()->getPlatformCode(); + POINT pos = util::geo::latLngToWebMerc(st.getStop()->getLat(), + st.getStop()->getLng()); + + if (stId > 0) { + int arrTime = startSecs; + + if (!st.getArrivalTime().empty()) { + arrTime = st.getArrivalTime().seconds() - startSecs; + } + + size_t arrChild = + getMatchChild(curNdId, name, platform, pos, arrTime, timeEx); + + if (arrChild) { + curNdId = arrChild; + + _nds[arrChild].accTime += arrTime; + _nds[arrChild].trips += 1; + + _nds[arrChild].rAttrs.merge(rAttrs); + } else { + curNdId = insert(st.getStop(), rAttrs, pos, arrTime, true, curNdId); + } + } + + if (stId < trip->getStopTimes().size() - 1) { + int depTime = startSecs; + + if (!st.getDepartureTime().empty()) { + depTime = st.getDepartureTime().seconds() - startSecs; + } + + size_t depChild = + getMatchChild(curNdId, name, platform, pos, depTime, timeEx); + + if (depChild) { + curNdId = depChild; + + _nds[depChild].accTime += depTime; + _nds[depChild].trips += 1; + + _nds[depChild].rAttrs.merge(rAttrs); + } else { + if (stId == 0 && _tripNds.size() > 0) return false; + curNdId = insert(st.getStop(), rAttrs, pos, depTime, false, curNdId); + } + } + } + + // curNdId is now the last matching node, insert the trip here + _tripNds[trip] = curNdId; + _ndTrips[curNdId].push_back(trip); + + return true; +} + +// _____________________________________________________________________________ +template +size_t TripTrie::get(TRIP* trip, bool timeEx) { + if (trip->getStopTimes().size() == 0) return false; + + int startSecs = trip->getStopTimes().front().getDepartureTime().seconds(); + + size_t curNdId = 0; + for (size_t stId = 0; stId < trip->getStopTimes().size(); stId++) { + const auto st = trip->getStopTimes()[stId]; + + std::string name = st.getStop()->getName(); + std::string platform = st.getStop()->getPlatformCode(); + POINT pos = util::geo::latLngToWebMerc(st.getStop()->getLat(), + st.getStop()->getLng()); + + if (stId > 0) { + int arrTime = startSecs; + + if (!st.getArrivalTime().empty()) { + arrTime = st.getArrivalTime().seconds() - startSecs; + } + + size_t arrChild = + getMatchChild(curNdId, name, platform, pos, arrTime, timeEx); + + if (arrChild) { + curNdId = arrChild; + } else { + return 0; + } + } + + if (stId < trip->getStopTimes().size() - 1) { + int depTime = startSecs; + + if (!st.getDepartureTime().empty()) { + depTime = st.getDepartureTime().seconds() - startSecs; + } + + size_t depChild = + getMatchChild(curNdId, name, platform, pos, depTime, timeEx); + + if (depChild) { + curNdId = depChild; + } else { + return 0; + } + } + } + + return curNdId; +} + +// _____________________________________________________________________________ +template +size_t TripTrie::insert(const ad::cppgtfs::gtfs::Stop* stop, + const RoutingAttrs& rAttrs, const POINT& pos, + int time, bool arr, size_t parent) { + _nds.emplace_back(TripTrieNd{stop, + stop->getName(), + stop->getPlatformCode(), + pos, + stop->getLat(), + stop->getLng(), + time, + arr, + time, + 1, + parent, + {}, + rAttrs}); + _nds[parent].childs.push_back(_nds.size() - 1); + return _nds.size() - 1; +} + +// _____________________________________________________________________________ +template +const std::vector& TripTrie::getNds() const { + return _nds; +} + +// _____________________________________________________________________________ +template +size_t TripTrie::getMatchChild(size_t parentNid, + const std::string& stopName, + const std::string& platform, POINT pos, + int time, bool timeEx) const { + for (size_t child : _nds[parentNid].childs) { + if (_nds[child].stopName == stopName && _nds[child].platform == platform && + util::geo::dist(_nds[child].pos, pos) < 1 && + (!timeEx || _nds[child].time == time)) { + return child; + } + } + + return 0; +} + +// _____________________________________________________________________________ +template +void TripTrie::toDot(std::ostream& os, const std::string& rootName, + size_t gid) const { + os << "digraph triptrie" << gid << " {"; + + for (size_t nid = 0; nid < _nds.size(); nid++) { + std::string color = "white"; + if (_ndTrips.count(nid)) color = "red"; + if (nid == 0) { + os << "\"" << gid << ":0\" [label=\"" << rootName << "\"];\n"; + } else { + os << "\"" << gid << ":" << nid + << "\" [shape=\"box\" style=\"filled\" fillcolor=\"" << color + << "\" label=\"#" << nid << ", " << _nds[nid].stopName << "@" + << util::geo::getWKT(_nds[nid].pos) << " t=" << _nds[nid].time + << "\"];\n"; + } + } + + for (size_t nid = 0; nid < _nds.size(); nid++) { + for (size_t child : _nds[nid].childs) { + os << "\"" << gid << ":" << nid << "\" -> \"" << gid << ":" << child + << "\";\n"; + } + } + + os << "}"; +} + +// _____________________________________________________________________________ +template +const std::map>& TripTrie::getNdTrips() const { + return _ndTrips; +} + +// _____________________________________________________________________________ +template +const pfaedle::router::TripTrieNd& TripTrie::getNd(size_t nid) const { + return _nds[nid]; +} diff --git a/src/pfaedle/router/Weights.cpp b/src/pfaedle/router/Weights.cpp new file mode 100644 index 0000000..4b96918 --- /dev/null +++ b/src/pfaedle/router/Weights.cpp @@ -0,0 +1,259 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#include +#include "pfaedle/router/Weights.h" + +using pfaedle::router::DistDiffTransWeight; +using pfaedle::router::ExpoTransWeight; +using pfaedle::router::LineSimilarity; +using pfaedle::router::NormDistrTransWeight; +using util::geo::haversine; + +// _____________________________________________________________________________ +ExpoTransWeight::DistHeur::DistHeur(double maxV, const RoutingOpts& rOpts, + const std::set& tos) + : _rOpts(rOpts), _maxV(maxV), _maxCentD(0), _lastE(0) { + size_t c = 0; + double x = 0, y = 0; + + for (const auto to : tos) { + x += to->getFrom()->pl().getGeom()->getX(); + y += to->getFrom()->pl().getGeom()->getY(); + c++; + } + + x /= c; + y /= c; + + _center = POINT{x, y}; + + for (const auto to : tos) { + const double cur = haversine(*to->getFrom()->pl().getGeom(), _center); + if (cur > _maxCentD) _maxCentD = cur; + } + + _maxCentD /= _maxV; +} + +// _____________________________________________________________________________ +uint32_t ExpoTransWeight::DistHeur::operator()( + const trgraph::Edge* a, const std::set& b) const { + UNUSED(b); + + // avoid repeated calculation for the same edge over and over again + if (a == _lastE) return _lastC; + + _lastE = a; + + const double d = haversine(*a->getFrom()->pl().getGeom(), _center); + const double heur = fmax(0, (d / _maxV - _maxCentD) * 10); + + // avoid overflow + if (heur > std::numeric_limits::max()) { + _lastC = std::numeric_limits::max(); + ; + return _lastC; + } + + _lastC = heur; + return heur; +} + +// _____________________________________________________________________________ +uint32_t ExpoTransWeight::CostFunc::operator()(const trgraph::Edge* from, + const trgraph::Node* n, + const trgraph::Edge* to) const { + if (!from) return 0; + + uint32_t c = from->pl().getCost(); + + if (c == std::numeric_limits::max()) return c; + + if (from == _lastFrom) { + // the transit line simi calculation is independent of the "to" edge, so if + // the last "from" edge was the same, skip it! + c = _lastC; + } else if (!_noLineSimiPen) { + const auto& simi = transitLineSimi(from); + + if (!simi.nameSimilar) { + if (_rOpts.lineUnmatchedPunishFact < 1) { + c = std::ceil(static_cast(c) * _rOpts.lineUnmatchedPunishFact); + } else if (_rOpts.lineUnmatchedPunishFact > 1) { + double a = + std::round(static_cast(c) * _rOpts.lineUnmatchedPunishFact); + if (a > std::numeric_limits::max()) + return std::numeric_limits::max(); + c = a; + } + } + + if (!simi.fromSimilar) { + if (_rOpts.lineNameFromUnmatchedPunishFact < 1) { + c = std::ceil(static_cast(c) * + _rOpts.lineNameFromUnmatchedPunishFact); + } else if (_rOpts.lineNameFromUnmatchedPunishFact > 1) { + double a = std::round(static_cast(c) * + _rOpts.lineNameFromUnmatchedPunishFact); + if (a > std::numeric_limits::max()) + return std::numeric_limits::max(); + c = a; + } + } + + if (!simi.toSimilar) { + if (_rOpts.lineNameToUnmatchedPunishFact < 1) { + c = std::ceil(static_cast(c) * + _rOpts.lineNameToUnmatchedPunishFact); + } else if (_rOpts.lineNameToUnmatchedPunishFact > 1) { + double a = std::round(static_cast(c) * + _rOpts.lineNameToUnmatchedPunishFact); + if (a > std::numeric_limits::max()) + return std::numeric_limits::max(); + c = a; + } + } + + _lastC = c; + _lastFrom = from; + } + + uint32_t overflowCheck = c; + + if (n && !n->pl().isTurnCycle()) { + if (_rOpts.fullTurnPunishFac != 0 && from->getFrom() == to->getTo() && + from->getTo() == to->getFrom()) { + // trivial full turn + c += _rOpts.fullTurnPunishFac; + + if (c <= overflowCheck) return std::numeric_limits::max(); + overflowCheck = c; + } else if (_rOpts.fullTurnPunishFac != 0 && n->getDeg() > 2) { + // otherwise, only intersection angles will be punished + + double ang = util::geo::innerProd( + *n->pl().getGeom(), from->pl().backHop(), to->pl().frontHop()); + + if (ang < _rOpts.fullTurnAngle) { + c += _rOpts.fullTurnPunishFac; + if (c <= overflowCheck) return std::numeric_limits::max(); + overflowCheck = c; + } + } + + // turn restriction cost + if (_rOpts.turnRestrCost > 0 && from->pl().isRestricted() && + !_res.may(from, to, n)) { + c += _rOpts.turnRestrCost; + if (c <= overflowCheck) return std::numeric_limits::max(); + } + } + + return c; +} + +// _____________________________________________________________________________ +LineSimilarity ExpoTransWeight::CostFunc::transitLineSimi( + const trgraph::Edge* e) const { + if (_rAttrs.shortName.empty() && _rAttrs.lineFrom.empty() && + _rAttrs.lineTo.empty()) + return {true, true, true}; + + LineSimilarity best = {false, false, false}; + for (const auto* l : e->pl().getLines()) { + auto simi = _rAttrs.simi(l); + if (simi.nameSimilar && simi.toSimilar && simi.fromSimilar) return simi; + if (best < simi) best = simi; + } + + return best; +} + +// _____________________________________________________________________________ +double ExpoTransWeight::weight(uint32_t c, double d, double t0, double d0, + const RoutingOpts& rOpts) { + UNUSED(t0); + UNUSED(d); + UNUSED(d0); + return rOpts.transitionPen * static_cast(c) / 10.0; +} + +// _____________________________________________________________________________ +uint32_t ExpoTransWeight::invWeight(double c, const RoutingOpts& rOpts) { + return std::round((c / rOpts.transitionPen) * 10.0); +} + +// _____________________________________________________________________________ +uint32_t ExpoTransWeight::maxCost(double tTime, const RoutingOpts& rOpts) { + // abort after 3 times the scheduled time, but assume a min time of + // 1 minute! + return std::ceil(fmax(tTime, 60) * 3.0 * rOpts.lineUnmatchedPunishFact * + rOpts.lineNameToUnmatchedPunishFact * + rOpts.lineNameFromUnmatchedPunishFact * 10); +} + +// _____________________________________________________________________________ + +// _____________________________________________________________________________ +double NormDistrTransWeight::weight(uint32_t cs, double d, double t0, double d0, + const RoutingOpts& rOpts) { + UNUSED(d); + UNUSED(d0); + UNUSED(rOpts); + + double t = static_cast(cs) / 10.0; + + // standard deviation of normal distribution + double standarddev = 1; + + // no backwards time travel! + if (t0 < 0) return std::numeric_limits::infinity(); + + // always assume it takes at least 10 seconds to travel + t0 = fmax(10, t0); + + double cNorm = (t / t0 - 1) / standarddev; + double normWeight = cNorm * cNorm; + + double expWeight = ExpoTransWeight::weight(cs, d, t0, d0, rOpts); + + return normWeight + expWeight; +} + +// _____________________________________________________________________________ +uint32_t NormDistrTransWeight::invWeight(double c, const RoutingOpts& rOpts) { + UNUSED(rOpts); + UNUSED(c); + + throw(std::runtime_error("Cannot apply inv weight to DistDiffTransWeight")); +} + +// _____________________________________________________________________________ + +// _____________________________________________________________________________ +double DistDiffTransWeight::weight(uint32_t c, double d, double t0, double d0, + const RoutingOpts& rOpts) { + UNUSED(t0); + UNUSED(c); + + double w = fabs(d - d0); + + return rOpts.transitionPen * w; +} + +// _____________________________________________________________________________ +uint32_t DistDiffTransWeight::invWeight(double c, const RoutingOpts& rOpts) { + UNUSED(rOpts); + UNUSED(c); + + throw(std::runtime_error("Cannot apply inv weight to DistDiffTransWeight")); +} + +// _____________________________________________________________________________ +uint32_t DistDiffTransWeight::maxCost(double tTime, const RoutingOpts& rOpts) { + UNUSED(tTime); + UNUSED(rOpts); + return std::numeric_limits::max(); +} diff --git a/src/pfaedle/router/Weights.h b/src/pfaedle/router/Weights.h new file mode 100644 index 0000000..1a616f5 --- /dev/null +++ b/src/pfaedle/router/Weights.h @@ -0,0 +1,161 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_ROUTER_WEIGHTS_H_ +#define PFAEDLE_ROUTER_WEIGHTS_H_ + +#include +#include "pfaedle/osm/Restrictor.h" +#include "pfaedle/router/Misc.h" +#include "pfaedle/router/RoutingAttrs.h" +#include "pfaedle/trgraph/Graph.h" +#include "util/graph/EDijkstra.h" + +namespace pfaedle { +namespace router { + +typedef util::graph::EDijkstra::CostFunc + RCostFunc; +typedef util::graph::EDijkstra::HeurFunc + RHeurFunc; + +class ExpoTransWeight { + public: + struct CostFunc : public RCostFunc { + CostFunc(const RoutingAttrs& rAttrs, const RoutingOpts& rOpts, + const osm::Restrictor& res, uint32_t max) + : _rAttrs(rAttrs), + _rOpts(rOpts), + _res(res), + _inf(max), + _noLineSimiPen(false), + _lastFrom(0) { + if (_rAttrs.lineFrom.empty() && _rAttrs.lineTo.empty() && + _rAttrs.shortName.empty()) { + _noLineSimiPen = true; + } + if (_rOpts.lineUnmatchedPunishFact == 1) { + _noLineSimiPen = true; + } + } + + const RoutingAttrs& _rAttrs; + const RoutingOpts& _rOpts; + const osm::Restrictor& _res; + uint32_t _inf; + bool _noLineSimiPen; + mutable const trgraph::Edge* _lastFrom; + mutable uint32_t _lastC = 0; + + uint32_t operator()(const trgraph::Edge* from, const trgraph::Node* n, + const trgraph::Edge* to) const; + uint32_t inf() const { return _inf; } + + LineSimilarity transitLineSimi(const trgraph::Edge* e) const; + }; + + struct DistHeur : RHeurFunc { + DistHeur(double maxV, const RoutingOpts& rOpts, + const std::set& tos); + + const RoutingOpts& _rOpts; + double _maxV; + POINT _center; + double _maxCentD; + uint32_t operator()(const trgraph::Edge* a, + const std::set& b) const; + mutable const trgraph::Edge* _lastE; + mutable uint32_t _lastC = 0; + }; + + static uint32_t maxCost(double tTime, const RoutingOpts& rOpts); + static double weight(uint32_t c, double d, double t0, double d0, + const RoutingOpts& rOpts); + static uint32_t invWeight(double cost, const RoutingOpts& rOpts); + static const bool ALLOWS_FAST_ROUTE = true; + static const bool NEED_DIST = false; +}; + +class ExpoTransWeightNoHeur : public ExpoTransWeight { + public: + struct DistHeur : RHeurFunc { + DistHeur(double maxV, const RoutingOpts& rOpts, + const std::set& tos) { + UNUSED(maxV); + UNUSED(rOpts); + UNUSED(tos); + } + + uint32_t operator()(const trgraph::Edge* a, + const std::set& b) const { + UNUSED(a); + UNUSED(b); + return 0; + } + }; +}; + +class NormDistrTransWeight : public ExpoTransWeight { + public: + static double weight(uint32_t c, double d, double t0, double d0, + const RoutingOpts& rOpts); + static uint32_t invWeight(double cost, const RoutingOpts& rOpts); + static const bool ALLOWS_FAST_ROUTE = false; + static const bool NEED_DIST = false; +}; + +class NormDistrTransWeightNoHeur : public NormDistrTransWeight { + public: + struct DistHeur : RHeurFunc { + DistHeur(double maxV, const RoutingOpts& rOpts, + const std::set& tos) { + UNUSED(maxV); + UNUSED(rOpts); + UNUSED(tos); + } + + uint32_t operator()(const trgraph::Edge* a, + const std::set& b) const { + UNUSED(a); + UNUSED(b); + return 0; + } + }; +}; + +class DistDiffTransWeight : public ExpoTransWeight { + public: + static uint32_t maxCost(double tTime, const RoutingOpts& rOpts); + static double weight(uint32_t c, double d, double t0, double d0, + const RoutingOpts& rOpts); + static uint32_t invWeight(double cost, const RoutingOpts& rOpts); + static const bool ALLOWS_FAST_ROUTE = false; + static const bool NEED_DIST = true; +}; + +class DistDiffTransWeightNoHeur : public DistDiffTransWeight { + public: + struct DistHeur : RHeurFunc { + DistHeur(double maxV, const RoutingOpts& rOpts, + const std::set& tos) { + UNUSED(maxV); + UNUSED(rOpts); + UNUSED(tos); + } + + uint32_t operator()(const trgraph::Edge* a, + const std::set& b) const { + UNUSED(a); + UNUSED(b); + return 0; + } + }; +}; + +} // namespace router +} // namespace pfaedle + +#endif // PFAEDLE_ROUTER_WEIGHTS_H_ diff --git a/src/pfaedle/statsimi-classifier/StatsimiClassifier.cpp b/src/pfaedle/statsimi-classifier/StatsimiClassifier.cpp new file mode 100644 index 0000000..ca90e39 --- /dev/null +++ b/src/pfaedle/statsimi-classifier/StatsimiClassifier.cpp @@ -0,0 +1,104 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#include +#include +#include +#include "pfaedle/Def.h" +#include "pfaedle/statsimi-classifier/StatsimiClassifier.h" +#include "util/geo/Geo.h" + +using pfaedle::statsimiclassifier::BTSClassifier; +using pfaedle::statsimiclassifier::EDClassifier; +using pfaedle::statsimiclassifier::JaccardClassifier; +using pfaedle::statsimiclassifier::JaccardGeodistClassifier; +using pfaedle::statsimiclassifier::PEDClassifier; + +// _____________________________________________________________________________ +bool JaccardGeodistClassifier::similar(const std::string& nameA, + const POINT& posA, + const std::string& nameB, + const POINT& posB) const { + const double THRES_M = + 0.00815467271246994481; // ln 2/85 from statsimi evaluation + const double THRES_JACC = .5; // from statsimi evaluation + + const double m = exp(-THRES_M * util::geo::haversine(posA, posB)); + double jacc = util::jaccardSimi(nameA, nameB); + + if (jacc > THRES_JACC) + jacc = .5 + (jacc - THRES_JACC) / (2.0 * (1.0 - THRES_JACC)); + else + jacc = jacc / (2.0 * THRES_JACC); + + return ((m + jacc) / 2.0) > 0.5; +} + +// _____________________________________________________________________________ +bool JaccardGeodistClassifier::similar(const std::string& nameA, + const std::string& nameB) const { + return util::jaccardSimi(nameA, nameB) > 0.45; // 0.45 from statsimi paper +} + +// _____________________________________________________________________________ +bool JaccardClassifier::similar(const std::string& nameA, const POINT& posA, + const std::string& nameB, + const POINT& posB) const { + UNUSED(posA); + UNUSED(posB); + return similar(nameA, nameB); +} + +// _____________________________________________________________________________ +bool JaccardClassifier::similar(const std::string& nameA, + const std::string& nameB) const { + return util::jaccardSimi(nameA, nameB) > 0.45; // 0.45 from statsimi paper +} + +// _____________________________________________________________________________ +bool BTSClassifier::similar(const std::string& nameA, const POINT& posA, + const std::string& nameB, const POINT& posB) const { + UNUSED(posA); + UNUSED(posB); + return similar(nameA, nameB); +} + +// _____________________________________________________________________________ +bool BTSClassifier::similar(const std::string& nameA, + const std::string& nameB) const { + return util::btsSimi(nameA, nameB) > 0.85; // 0.85 from statsimi paper +} + +// _____________________________________________________________________________ +bool EDClassifier::similar(const std::string& nameA, const POINT& posA, + const std::string& nameB, const POINT& posB) const { + UNUSED(posA); + UNUSED(posB); + return similar(nameA, nameB); +} + +// _____________________________________________________________________________ +bool EDClassifier::similar(const std::string& nameA, + const std::string& nameB) const { + double edSimi = 1.0 - ((util::editDist(nameA, nameB) * 1.0) / + fmax(nameA.size(), nameB.size())); + return edSimi > 0.85; // 0.85 from statsimi paper +} + +// _____________________________________________________________________________ +bool PEDClassifier::similar(const std::string& nameA, const POINT& posA, + const std::string& nameB, const POINT& posB) const { + UNUSED(posA); + UNUSED(posB); + return similar(nameA, nameB); +} + +// _____________________________________________________________________________ +bool PEDClassifier::similar(const std::string& nameA, + const std::string& nameB) const { + double a = (util::prefixEditDist(nameA, nameB) * 1.0) / (nameA.size() * 1.0); + double b = (util::prefixEditDist(nameB, nameA) * 1.0) / (nameB.size() * 1.0); + double pedSimi = 1.0 - fmin(a, b); + return pedSimi > 0.875; // 0.875 average of values from statsimi paper +} diff --git a/src/pfaedle/statsimi-classifier/StatsimiClassifier.h b/src/pfaedle/statsimi-classifier/StatsimiClassifier.h new file mode 100644 index 0000000..cdde205 --- /dev/null +++ b/src/pfaedle/statsimi-classifier/StatsimiClassifier.h @@ -0,0 +1,68 @@ +// Copyright 2020, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_STATSIMI_CLASSIFIER_STATSIMICLASSIFIER_H_ +#define PFAEDLE_STATSIMI_CLASSIFIER_STATSIMICLASSIFIER_H_ + +#include +#include "pfaedle/Def.h" +#include "util/geo/Geo.h" + +namespace pfaedle { +namespace statsimiclassifier { + +class StatsimiClassifier { + public: + virtual ~StatsimiClassifier() {} + virtual bool similar(const std::string& nameA, const POINT& posA, + const std::string& nameB, const POINT& posB) const = 0; + + virtual bool similar(const std::string& nameA, + const std::string& nameB) const = 0; +}; + +class JaccardClassifier : public StatsimiClassifier { + public: + virtual bool similar(const std::string& nameA, const POINT& posA, + const std::string& nameB, const POINT& posB) const; + virtual bool similar(const std::string& nameA, + const std::string& nameB) const; +}; + +class JaccardGeodistClassifier : public StatsimiClassifier { + public: + virtual bool similar(const std::string& nameA, const POINT& posA, + const std::string& nameB, const POINT& posB) const; + virtual bool similar(const std::string& nameA, + const std::string& nameB) const; +}; + +class BTSClassifier : public StatsimiClassifier { + public: + virtual bool similar(const std::string& nameA, const POINT& posA, + const std::string& nameB, const POINT& posB) const; + virtual bool similar(const std::string& nameA, + const std::string& nameB) const; +}; + +class EDClassifier : public StatsimiClassifier { + public: + virtual bool similar(const std::string& nameA, const POINT& posA, + const std::string& nameB, const POINT& posB) const; + virtual bool similar(const std::string& nameA, + const std::string& nameB) const; +}; + +class PEDClassifier : public StatsimiClassifier { + public: + virtual bool similar(const std::string& nameA, const POINT& posA, + const std::string& nameB, const POINT& posB) const; + virtual bool similar(const std::string& nameA, + const std::string& nameB) const; +}; + +} // namespace statsimiclassifier +} // namespace pfaedle + +#endif // PFAEDLE_STATSIMI_CLASSIFIER_STATSIMICLASSIFIER_H_ diff --git a/src/pfaedle/tests/CMakeLists.txt b/src/pfaedle/tests/CMakeLists.txt new file mode 100644 index 0000000..deb3632 --- /dev/null +++ b/src/pfaedle/tests/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable(pfaedleTest TestMain.cpp) +target_link_libraries(pfaedleTest pfaedle_dep util) diff --git a/src/pfaedle/tests/TestMain.cpp b/src/pfaedle/tests/TestMain.cpp new file mode 100644 index 0000000..4187a9b --- /dev/null +++ b/src/pfaedle/tests/TestMain.cpp @@ -0,0 +1,329 @@ +// Copyright 2020 +// Author: Patrick Brosi + +#include "pfaedle/osm/Restrictor.h" + +#define private public +#include "pfaedle/router/Router.h" +#undef private +#define private private + +using pfaedle::osm::Restrictor; +using pfaedle::router::CostMatrix; +using pfaedle::router::EdgeCandGroup; +using pfaedle::router::ExpoTransWeight; +using pfaedle::router::LayerCostsDAG; +using pfaedle::router::RouterImpl; +using pfaedle::router::RoutingAttrs; +using pfaedle::router::RoutingOpts; +using util::approx; + +// _____________________________________________________________________________ +uint32_t cmGet(const CostMatrix& m, size_t i, size_t j) { + for (const auto& e : m) { + if (e.first.first == i && e.first.second == j) return e.second; + } + + return -1; +} + +// _____________________________________________________________________________ +int main(int argc, char** argv) { + UNUSED(argc); + UNUSED(argv); + RouterImpl router; + + RoutingAttrs rAttrs; + RoutingOpts rOpts; + Restrictor restr; + LayerCostsDAG initCosts; + + // to make sure we always underestimate the cost in the heuristic for testing + pfaedle::trgraph::NodePL::comps.emplace_back( + pfaedle::trgraph::Component{9999999}); + + // build transit graph + pfaedle::trgraph::Graph g; + auto a = g.addNd(POINT{0, 0}); + auto b = g.addNd(POINT{0, 10}); + auto c = g.addNd(POINT{10, 0}); + auto d = g.addNd(POINT{20, 0}); + + a->pl().setComp(1); + b->pl().setComp(1); + c->pl().setComp(1); + d->pl().setComp(1); + + auto eA = g.addEdg(a, c); + auto eB = g.addEdg(b, c); + auto eC = g.addEdg(c, d); + + eA->pl().setCost(10); + eB->pl().setCost(6); + eC->pl().setCost(100); + + { + EdgeCandGroup froms, tos; + CostMatrix costM, dists; + froms.push_back({eA, 0, 0, {}, 0, {}}); + froms.push_back({eB, 0, 0, {}, 0, {}}); + tos.push_back({eC, 0, 0, {}, 0, {}}); + + double maxTime = 9999; + + pfaedle::router::HopCache c; + + router.hops(froms, tos, &costM, &dists, rAttrs, rOpts, restr, &c, maxTime); + + TEST(cmGet(costM, 0, 0), ==, approx(10)); + TEST(cmGet(costM, 1, 0), ==, approx(6)); + } + + { + EdgeCandGroup froms, tos; + CostMatrix costM, dists; + froms.push_back({eA, 0, 0, {}, 0, {}}); + froms.push_back({eB, 0, 0, {}, 0, {}}); + tos.push_back({eC, 0, 0.5, {}, 0, {}}); + + double maxTime = 9999; + + pfaedle::router::HopCache c; + + router.hops(froms, tos, &costM, &dists, rAttrs, rOpts, restr, &c, maxTime); + + TEST(cmGet(costM, 0, 0), ==, approx(50 + 10)); + TEST(cmGet(costM, 1, 0), ==, approx(50 + 6)); + } + + { + EdgeCandGroup froms, tos; + CostMatrix costM, dists; + froms.push_back({eA, 0, 0.5, {}, 0, {}}); + froms.push_back({eB, 0, 2.0 / 3.0, {}, 0, {}}); + tos.push_back({eC, 0, 0, {}, 0, {}}); + + double maxTime = 9999; + + pfaedle::router::HopCache c; + + router.hops(froms, tos, &costM, &dists, rAttrs, rOpts, restr, &c, maxTime); + + TEST(cmGet(costM, 0, 0), ==, approx(5)); + TEST(cmGet(costM, 1, 0), ==, approx(2)); + } + + { + EdgeCandGroup froms, tos; + CostMatrix costM, dists; + froms.push_back({eA, 0, 0.5, {}, 0, {}}); + froms.push_back({eB, 0, 2.0 / 3.0, {}, 0, {}}); + tos.push_back({eC, 0, 0.9, {}, 0, {}}); + + double maxTime = 9999; + + pfaedle::router::HopCache c; + + router.hops(froms, tos, &costM, &dists, rAttrs, rOpts, restr, &c, maxTime); + + TEST(cmGet(costM, 0, 0), ==, approx(90 + 5)); + TEST(cmGet(costM, 1, 0), ==, approx(90 + 2)); + } + + // with hopsfast + { + EdgeCandGroup froms, tos; + CostMatrix costM, dists; + froms.push_back({eA, 0, 0, {}, 0, {}}); + froms.push_back({eB, 0, 0, {}, 0, {}}); + tos.push_back({eC, 0, 0, {}, 0, {}}); + + LayerCostsDAG initCost{0, 0}; + + double maxTime = 9999; + + pfaedle::router::HopCache c; + + router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, + maxTime); + + TEST(cmGet(costM, 0, 0), >=, maxTime); + TEST(cmGet(costM, 1, 0), ==, approx(6)); + } + + { + EdgeCandGroup froms, tos; + CostMatrix costM, dists; + froms.push_back({eA, 0, 0, {}, 0, {}}); + froms.push_back({eB, 0, 0, {}, 0, {}}); + tos.push_back({eC, 0, 0.5, {}, 0, {}}); + + LayerCostsDAG initCost{0, 0}; + + double maxTime = 9999; + + pfaedle::router::HopCache c; + + router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, + maxTime); + + TEST(cmGet(costM, 0, 0), >=, maxTime); + TEST(cmGet(costM, 1, 0), ==, approx(50 + 6)); + } + + { + EdgeCandGroup froms, tos; + CostMatrix costM, dists; + froms.push_back({eA, 0, 0.5, {}, 0, {}}); + froms.push_back({eB, 0, 2.0 / 3.0, {}, 0, {}}); + tos.push_back({eC, 0, 0, {}, 0, {}}); + + LayerCostsDAG initCost{0, 0}; + + double maxTime = 9999; + + pfaedle::router::HopCache c; + + router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, + maxTime); + + TEST(cmGet(costM, 0, 0), >=, maxTime); + TEST(cmGet(costM, 1, 0), ==, approx(2)); + } + + { + EdgeCandGroup froms, tos; + CostMatrix costM, dists; + froms.push_back({eA, 0, 0.5, {}, 0, {}}); + froms.push_back({eB, 0, 2.0 / 3.0, {}, 0, {}}); + tos.push_back({eC, 0, 0.9, {}, 0, {}}); + + LayerCostsDAG initCost{0, 0}; + + double maxTime = 9999; + + pfaedle::router::HopCache c; + + router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, + maxTime); + + TEST(cmGet(costM, 0, 0), >=, maxTime); + TEST(cmGet(costM, 1, 0), ==, approx(90 + 2)); + } + + { + EdgeCandGroup froms, tos; + CostMatrix costM, dists; + froms.push_back({eA, 0, 0.5, {}, 0, {}}); + froms.push_back({eB, 0, 0, {}, 0, {}}); + tos.push_back({eC, 0, 0, {}, 0, {}}); + + LayerCostsDAG initCost{0, 0}; + + double maxTime = 9999; + + pfaedle::router::HopCache c; + + router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, + maxTime); + + TEST(cmGet(costM, 0, 0), ==, approx(5)); + TEST(cmGet(costM, 1, 0), >=, maxTime); + } + + { + EdgeCandGroup froms, tos; + CostMatrix costM, dists; + froms.push_back({eA, 0, 0.5, {}, 0, {}}); + froms.push_back({eB, 0, 0, {}, 0, {}}); + tos.push_back({eC, 0, 0, {}, 0, {}}); + + LayerCostsDAG initCost{9999, 0}; + + double maxTime = 9999; + + pfaedle::router::HopCache c; + + router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, + maxTime); + + TEST(cmGet(costM, 0, 0), ==, approx(5)); + TEST(cmGet(costM, 1, 0), >=, maxTime); + } + + { + EdgeCandGroup froms, tos; + CostMatrix costM, dists; + froms.push_back({eA, 0, 0.5, {}, 0, {}}); + froms.push_back({eA, 0, 0, {}, 0, {}}); + froms.push_back({eB, 0, 0, {}, 0, {}}); + tos.push_back({eC, 0, 0, {}, 0, {}}); + + LayerCostsDAG initCost{6, 0, 20}; + + double maxTime = 9999; + + pfaedle::router::HopCache c; + + router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, + maxTime); + + // we also get this, because the edge is the same! + TEST(cmGet(costM, 0, 0), ==, approx(5)); + TEST(cmGet(costM, 1, 0), ==, approx(10)); + TEST(cmGet(costM, 2, 0), >=, maxTime); + } + + { + EdgeCandGroup froms, tos; + CostMatrix costM, dists; + froms.push_back({eA, 0, 0.5, {}, 0, {}}); + froms.push_back({eA, 0, 0, {}, 0, {}}); + froms.push_back({eB, 0, 0, {}, 0, {}}); + tos.push_back({eC, 0, 1, {}, 0, {}}); + + LayerCostsDAG initCost{6, 0, 20}; + + double maxTime = 9999; + + pfaedle::router::HopCache c; + + router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, + maxTime); + + // we also get this, because the edge is the same! + TEST(cmGet(costM, 0, 0), ==, approx(5 + 100)); + TEST(cmGet(costM, 1, 0), ==, approx(10 + 100)); + TEST(cmGet(costM, 2, 0), >=, maxTime); + } + + { + EdgeCandGroup froms, tos; + CostMatrix costM, dists; + froms.push_back({eA, 0, 0.5, {}, 0, {}}); + froms.push_back({eA, 0, 0, {}, 0, {}}); + froms.push_back({eB, 0, 0, {}, 0, {}}); + + tos.push_back({eC, 0, 1, {}, 0, {}}); + tos.push_back({eC, 0, 0.5, {}, 0, {}}); + + LayerCostsDAG initCost{6, 0, 20}; + + double maxTime = 9999; + + pfaedle::router::HopCache c; + + router.hopsFast(froms, tos, initCost, &costM, rAttrs, rOpts, restr, &c, + maxTime); + + // we also get this, because the edge is the same! + TEST(cmGet(costM, 0, 0), ==, approx(5 + 100)); + TEST(cmGet(costM, 1, 0), ==, approx(10 + 100)); + TEST(cmGet(costM, 0, 1), ==, approx(5 + 50)); + TEST(cmGet(costM, 1, 1), ==, approx(10 + 50)); + TEST(cmGet(costM, 2, 0), >=, maxTime); + TEST(cmGet(costM, 2, 1), >=, maxTime); + } + + exit(0); +} diff --git a/src/pfaedle/trgraph/EdgePL.cpp b/src/pfaedle/trgraph/EdgePL.cpp index 9612db7..7c5b613 100644 --- a/src/pfaedle/trgraph/EdgePL.cpp +++ b/src/pfaedle/trgraph/EdgePL.cpp @@ -6,18 +6,17 @@ #include #include #include "pfaedle/trgraph/EdgePL.h" +#include "util/geo/Geo.h" using pfaedle::trgraph::EdgePL; using pfaedle::trgraph::TransitEdgeLine; -std::map EdgePL::_flines; +std::map EdgePL::_flines; std::map EdgePL::_tlines; // _____________________________________________________________________________ EdgePL::EdgePL() - : _length(0), _oneWay(0), _hasRestr(false), _rev(false), _lvl(0) { - _l = new util::geo::FLine(); - _flines[_l] = 1; + : _oneWay(0), _hasRestr(false), _rev(false), _lvl(0), _cost(0), _l(0) { } // _____________________________________________________________________________ @@ -25,19 +24,22 @@ EdgePL::EdgePL(const EdgePL& pl) : EdgePL(pl, false) {} // _____________________________________________________________________________ EdgePL::EdgePL(const EdgePL& pl, bool geoflat) - : _length(pl._length), - _oneWay(pl._oneWay), + : _oneWay(pl._oneWay), _hasRestr(pl._hasRestr), _rev(pl._rev), - _lvl(pl._lvl) { - if (geoflat) { - _l = pl._l; - } else { - _l = new util::geo::FLine(*pl._l); + _lvl(pl._lvl), + _cost(pl._cost), + _l(0) { + if (pl._l) { + if (geoflat) { + _l = pl._l; + } else { + _l = new LINE(*pl._l); + } + _flines[_l]++; } - _flines[_l]++; - for (auto l : _lines) addLine(l); + for (auto l : pl._lines) addLine(l); } // _____________________________________________________________________________ @@ -73,14 +75,23 @@ EdgePL EdgePL::revCopy() const { } // _____________________________________________________________________________ -void EdgePL::setLength(double d) { _length = d; } +double EdgePL::getLength() const { + double len = 0; -// _____________________________________________________________________________ -double EdgePL::getLength() const { return _length; } + for (size_t i = 1; i < _l->size(); i++) { + len += haversine((*_l)[i-1], (*_l)[i]); + } + + return len; +} // _____________________________________________________________________________ void EdgePL::addLine(const TransitEdgeLine* l) { - if (_lines.insert(l).second) { + auto lb = std::lower_bound(_lines.begin(), _lines.end(), l); + if (lb == _lines.end() || *lb != l) { + _lines.reserve(_lines.size() + 1); + lb = std::lower_bound(_lines.begin(), _lines.end(), l); + _lines.insert(lb, l); if (_tlines.count(l)) _tlines[l]++; else @@ -94,25 +105,33 @@ void EdgePL::addLines(const std::vector& l) { } // _____________________________________________________________________________ -const std::set& EdgePL::getLines() const { +const std::vector& EdgePL::getLines() const { return _lines; } // _____________________________________________________________________________ -void EdgePL::addPoint(const util::geo::FPoint& p) { _l->push_back(p); } +void EdgePL::addPoint(const POINT& p) { + if (!_l) { + _l = new LINE(); + _flines[_l] = 1; + } + _l->push_back(p); +} // _____________________________________________________________________________ -const util::geo::FLine* EdgePL::getGeom() const { return _l; } +const LINE* EdgePL::getGeom() const { return _l; } // _____________________________________________________________________________ -util::geo::FLine* EdgePL::getGeom() { return _l; } +LINE* EdgePL::getGeom() { return _l; } // _____________________________________________________________________________ -void EdgePL::getAttrs(std::map* obj) const { - (*obj)["m_length"] = std::to_string(_length); - (*obj)["oneway"] = std::to_string(static_cast(_oneWay)); - (*obj)["level"] = std::to_string(_lvl); - (*obj)["restriction"] = isRestricted() ? "yes" : "no"; +util::json::Dict EdgePL::getAttrs() const { + util::json::Dict obj; + obj["m_length"] = std::to_string(getLength()); + obj["oneway"] = std::to_string(static_cast(_oneWay)); + obj["cost"] = std::to_string(static_cast(_cost) / 10.0); + obj["level"] = std::to_string(_lvl); + obj["restriction"] = isRestricted() ? "yes" : "no"; std::stringstream ss; bool first = false; @@ -127,7 +146,8 @@ void EdgePL::getAttrs(std::map* obj) const { first = true; } - (*obj)["lines"] = ss.str(); + obj["lines"] = ss.str(); + return obj; } // _____________________________________________________________________________ @@ -146,10 +166,10 @@ void EdgePL::setOneWay(uint8_t dir) { _oneWay = dir; } void EdgePL::setOneWay() { _oneWay = 1; } // _____________________________________________________________________________ -void EdgePL::setLvl(uint8_t lvl) { _lvl = lvl; } +uint32_t EdgePL::getCost() const { return _cost; } // _____________________________________________________________________________ -uint8_t EdgePL::lvl() const { return _lvl; } +void EdgePL::setCost(uint32_t c) { _cost = c; } // _____________________________________________________________________________ void EdgePL::setRev() { _rev = true; } @@ -158,7 +178,7 @@ void EdgePL::setRev() { _rev = true; } bool EdgePL::isRev() const { return _rev; } // _____________________________________________________________________________ -const util::geo::FPoint& EdgePL::backHop() const { +const POINT& EdgePL::backHop() const { if (isRev()) { return *(++(getGeom()->cbegin())); } @@ -166,7 +186,7 @@ const util::geo::FPoint& EdgePL::backHop() const { } // _____________________________________________________________________________ -const util::geo::FPoint& EdgePL::frontHop() const { +const POINT& EdgePL::frontHop() const { if (isRev()) { return *(++(getGeom()->crbegin())); } diff --git a/src/pfaedle/trgraph/EdgePL.h b/src/pfaedle/trgraph/EdgePL.h index b467f44..3c71fea 100644 --- a/src/pfaedle/trgraph/EdgePL.h +++ b/src/pfaedle/trgraph/EdgePL.h @@ -9,7 +9,9 @@ #include #include #include +#include "pfaedle/Def.h" #include "pfaedle/router/Comp.h" +#include "util/geo/Geo.h" #include "util/geo/GeoGraph.h" using util::geograph::GeoEdgePL; @@ -17,7 +19,6 @@ using util::geograph::GeoEdgePL; namespace pfaedle { namespace trgraph { - /* * A line occuring on an edge */ @@ -25,14 +26,17 @@ struct TransitEdgeLine { std::string fromStr; std::string toStr; std::string shortName; + uint32_t color; }; inline bool operator==(const TransitEdgeLine& a, const TransitEdgeLine& b) { + // ignoring color here! return a.fromStr == b.fromStr && a.toStr == b.toStr && a.shortName == b.shortName; } inline bool operator<(const TransitEdgeLine& a, const TransitEdgeLine& b) { + // ignoring color here! return a.fromStr < b.fromStr || (a.fromStr == b.fromStr && a.toStr < b.toStr) || (a.fromStr == b.fromStr && a.toStr == b.toStr && @@ -42,7 +46,7 @@ inline bool operator<(const TransitEdgeLine& a, const TransitEdgeLine& b) { /* * An edge payload class for the transit graph. */ -class EdgePL : public GeoEdgePL { +class EdgePL { public: EdgePL(); ~EdgePL(); @@ -50,26 +54,32 @@ class EdgePL : public GeoEdgePL { EdgePL(const EdgePL& pl, bool geoFlat); // Return the geometry of this edge. - const util::geo::FLine* getGeom() const; - util::geo::FLine* getGeom(); + const LINE* getGeom() const; + LINE* getGeom(); // Extends this edge payload's geometry by Point p - void addPoint(const util::geo::FPoint& p); + void addPoint(const POINT& p); // Fill obj with k/v pairs describing the parameters of this payload. - void getAttrs(std::map* obj) const; + util::json::Dict getAttrs() const; // Return the length in meters stored for this edge payload double getLength() const; - // Set the length in meters for this edge payload - void setLength(double d); - // Set this edge as a one way node, either in the default direction of // the edge (no arg), or the direction specified in dir void setOneWay(); void setOneWay(uint8_t dir); + void setLvl(uint8_t lvl) { assert(lvl < 9); _lvl = lvl; } + uint8_t lvl() const { return _lvl; } + + // Return the cost for this edge payload + uint32_t getCost() const; + + // Set the cost for this edge payload + void setCost(uint32_t d); + // Mark this payload' edge as having some restrictions void setRestricted(); @@ -82,12 +92,6 @@ class EdgePL : public GeoEdgePL { // True if this edge is restricted bool isRestricted() const; - // Set the level of this edge. - void setLvl(uint8_t lvl); - - // Return the level of this edge. - uint8_t lvl() const; - // Return the one-way code stored for this edge. uint8_t oneWay() const; @@ -98,33 +102,33 @@ class EdgePL : public GeoEdgePL { void addLines(const std::vector& l); // Return the TransitEdgeLines stored for this payload - const std::set& getLines() const; + const std::vector& getLines() const; // Returns the last hop of the payload - this is the (n-2)th point in // the payload geometry of length n > 1 - const util::geo::FPoint& backHop() const; + const POINT& backHop() const; // Returns the first hop of the payload - this is the 2nd point in // the payload geometry of length n > 1 - const util::geo::FPoint& frontHop() const; + const POINT& frontHop() const; // Obtain an exact copy of this edge, but in reverse. EdgePL revCopy() const; private: - float _length; uint8_t _oneWay : 2; bool _hasRestr : 1; bool _rev : 1; - uint8_t _lvl : 3; + uint8_t _lvl: 4; + uint32_t _cost; // costs in 1/10th seconds - util::geo::FLine* _l; + LINE* _l; - std::set _lines; + std::vector _lines; static void unRefTLine(const TransitEdgeLine* l); - static std::map _flines; + static std::map _flines; static std::map _tlines; }; } // namespace trgraph diff --git a/src/pfaedle/trgraph/Graph.h b/src/pfaedle/trgraph/Graph.h index 82602e7..7f7d50a 100644 --- a/src/pfaedle/trgraph/Graph.h +++ b/src/pfaedle/trgraph/Graph.h @@ -14,8 +14,6 @@ using util::geo::Grid; using util::geo::Point; using util::geo::Line; -using util::geo::FPoint; -using util::geo::FLine; namespace pfaedle { namespace trgraph { @@ -26,8 +24,8 @@ namespace trgraph { typedef util::graph::Edge Edge; typedef util::graph::Node Node; typedef util::graph::DirGraph Graph; -typedef Grid NodeGrid; -typedef Grid EdgeGrid; +typedef Grid NodeGrid; +typedef Grid EdgeGrid; } // namespace trgraph } // namespace pfaedle diff --git a/src/pfaedle/trgraph/NodePL.cpp b/src/pfaedle/trgraph/NodePL.cpp index d3917c3..b4bba8b 100644 --- a/src/pfaedle/trgraph/NodePL.cpp +++ b/src/pfaedle/trgraph/NodePL.cpp @@ -3,122 +3,140 @@ // Authors: Patrick Brosi #include +#include +#include #include #include "pfaedle/trgraph/NodePL.h" -#include "pfaedle/trgraph/StatGroup.h" #include "pfaedle/trgraph/StatInfo.h" #include "util/String.h" -using pfaedle::trgraph::StatInfo; -using pfaedle::trgraph::NodePL; using pfaedle::trgraph::Component; +using pfaedle::trgraph::NodePL; +using pfaedle::trgraph::StatInfo; -// we use the adress of this dummy station info as a special value -// of this node, meaning "is a station block". Re-using the _si field here -// saves some memory -StatInfo NodePL::_blockerSI = StatInfo(); - -std::unordered_map NodePL::_comps; +std::vector NodePL::comps; +std::vector NodePL::_statInfos; // _____________________________________________________________________________ -NodePL::NodePL() : _geom(0, 0), _si(0), _component(0), _vis(0) {} - -// _____________________________________________________________________________ -NodePL::NodePL(const NodePL& pl) - : _geom(pl._geom), _si(0), _component(pl._component), _vis(pl._vis) { - if (pl._si) setSI(*(pl._si)); +NodePL::NodePL() + : _geom(0, 0), + _si(0), + _component(0) +#ifdef PFAEDLE_DBG + , + _vis(0) +#endif +{ } // _____________________________________________________________________________ -NodePL::NodePL(const util::geo::FPoint& geom) - : _geom(geom), _si(0), _component(0), _vis(0) {} +NodePL::NodePL(const POINT& geom) + : _geom(geom), + _si(0), + _component(0) +#ifdef PFAEDLE_DBG + , + _vis(0) +#endif +{ +} // _____________________________________________________________________________ -NodePL::NodePL(const util::geo::FPoint& geom, const StatInfo& si) - : _geom(geom), _si(0), _component(0), _vis(0) { +NodePL::NodePL(const POINT& geom, const StatInfo& si) + : _geom(geom), + _si(0), + _component(0) +#ifdef PFAEDLE_DBG + , + _vis(0) +#endif +{ setSI(si); } // _____________________________________________________________________________ -NodePL::~NodePL() { - if (getSI()) delete _si; - if (_component) { - _comps[_component]--; - if (_comps[_component] == 0) { - delete _component; - _comps.erase(_comps.find(_component)); - } - } +void NodePL::setVisited() const { +#ifdef PFAEDLE_DBG + _vis = true; +#endif } -// _____________________________________________________________________________ -void NodePL::setVisited() const { _vis = true; } - // _____________________________________________________________________________ void NodePL::setNoStat() { _si = 0; } // _____________________________________________________________________________ -const Component* NodePL::getComp() const { return _component; } +const Component& NodePL::getComp() const { return comps[_component - 1]; } // _____________________________________________________________________________ -void NodePL::setComp(const Component* c) { - if (_component == c) return; - _component = c; +uint32_t NodePL::getCompId() const { return _component; } - // NOT thread safe! - if (!_comps.count(c)) - _comps[c] = 1; - else - _comps[c]++; +// _____________________________________________________________________________ +void NodePL::setComp(uint32_t id) { + _component = id; } // _____________________________________________________________________________ -const util::geo::FPoint* NodePL::getGeom() const { return &_geom; } +const POINT* NodePL::getGeom() const { return &_geom; } // _____________________________________________________________________________ -void NodePL::setGeom(const util::geo::FPoint& geom) { _geom = geom; } +void NodePL::setGeom(const POINT& geom) { _geom = geom; } // _____________________________________________________________________________ -void NodePL::getAttrs(std::map* obj) const { - (*obj)["component"] = std::to_string(reinterpret_cast(_component)); - (*obj)["dijkstra_vis"] = _vis ? "yes" : "no"; +util::json::Dict NodePL::getAttrs() const { + util::json::Dict obj; + obj["component"] = std::to_string(_component); +#ifdef PFAEDLE_DBG + obj["dijkstra_vis"] = _vis ? "yes" : "no"; +#endif if (getSI()) { - (*obj)["station_info_ptr"] = util::toString(_si); - (*obj)["station_name"] = _si->getName(); - (*obj)["station_alt_names"] = util::implode(_si->getAltNames(), ","); - (*obj)["from_osm"] = _si->isFromOsm() ? "yes" : "no"; - (*obj)["station_platform"] = _si->getTrack(); - (*obj)["station_group"] = - std::to_string(reinterpret_cast(_si->getGroup())); + obj["station_info_ptr"] = util::toString(_si); + obj["station_name"] = getSI()->getName(); + obj["station_alt_names"] = + util::implode(getSI()->getAltNames(), ","); + obj["station_platform"] = getSI()->getTrack(); - std::stringstream gtfsIds; - if (_si->getGroup()) { - for (auto* s : _si->getGroup()->getStops()) { - gtfsIds << s->getId() << " (" << s->getName() << "),"; - } - } - - (*obj)["station_group_stops"] = gtfsIds.str(); +#ifdef PFAEDLE_STATION_IDS + // only print this in debug mode + obj["station_id"] = getSI()->getId(); +#endif } + return obj; } // _____________________________________________________________________________ -void NodePL::setSI(const StatInfo& si) { _si = new StatInfo(si); } +void NodePL::setSI(const StatInfo& si) { + _statInfos.emplace_back(si); + _si = _statInfos.size(); +} // _____________________________________________________________________________ const StatInfo* NodePL::getSI() const { if (isBlocker()) return 0; - return _si; + if (isTurnCycle()) return 0; + if (_si == 0) return 0; + return &_statInfos[_si - 1]; } // _____________________________________________________________________________ StatInfo* NodePL::getSI() { if (isBlocker()) return 0; - return _si; + if (isTurnCycle()) return 0; + if (_si == 0) return 0; + return &_statInfos[_si - 1]; } // _____________________________________________________________________________ -void NodePL::setBlocker() { _si = &_blockerSI; } +void NodePL::setTurnCycle() { _si = std::numeric_limits::max() - 1; } // _____________________________________________________________________________ -bool NodePL::isBlocker() const { return _si == &_blockerSI; } +bool NodePL::isTurnCycle() const { + return _si == (std::numeric_limits::max() - 1); +} + +// _____________________________________________________________________________ +void NodePL::setBlocker() { _si = std::numeric_limits::max(); } + +// _____________________________________________________________________________ +bool NodePL::isBlocker() const { + return _si == std::numeric_limits::max(); +} diff --git a/src/pfaedle/trgraph/NodePL.h b/src/pfaedle/trgraph/NodePL.h index 054129f..8d4efa9 100644 --- a/src/pfaedle/trgraph/NodePL.h +++ b/src/pfaedle/trgraph/NodePL.h @@ -8,8 +8,11 @@ #include #include #include +#include #include "ad/cppgtfs/gtfs/Feed.h" +#include "pfaedle/Def.h" #include "pfaedle/trgraph/StatInfo.h" +#include "util/geo/Geo.h" #include "util/geo/GeoGraph.h" using util::geograph::GeoNodePL; @@ -18,26 +21,24 @@ namespace pfaedle { namespace trgraph { struct Component { - uint8_t minEdgeLvl : 3; + float maxSpeed; }; /* * A node payload class for the transit graph. */ -class NodePL : public GeoNodePL { +class NodePL { public: NodePL(); - NodePL(const NodePL& pl); // NOLINT - NodePL(const util::geo::FPoint& geom); // NOLINT - NodePL(const util::geo::FPoint& geom, const StatInfo& si); - ~NodePL(); + NodePL(const POINT& geom); // NOLINT + NodePL(const POINT& geom, const StatInfo& si); // Return the geometry of this node. - const util::geo::FPoint* getGeom() const; - void setGeom(const util::geo::FPoint& geom); + const POINT* getGeom() const; + void setGeom(const POINT& geom); // Fill obj with k/v pairs describing the parameters of this payload. - void getAttrs(std::map* attrs) const; + util::json::Dict getAttrs() const; // Set the station info for this node void setSI(const StatInfo& si); @@ -50,10 +51,13 @@ class NodePL : public GeoNodePL { void setNoStat(); // Get the component of this node - const Component* getComp() const; + const Component& getComp() const; + + // Get the component of this node + uint32_t getCompId() const; // Set the component of this node - void setComp(const Component* c); + void setComp(uint32_t c); // Make this node a blocker void setBlocker(); @@ -61,21 +65,27 @@ class NodePL : public GeoNodePL { // Check if this node is a blocker bool isBlocker() const; + // Make this node a turning cycle + void setTurnCycle(); + + // Check if this node is a blocker + bool isTurnCycle() const; + // Mark this node as visited (usefull for counting search space in Dijkstra) + // (only works for DEBUG build type) void setVisited() const; + static std::vector comps; + private: - std::string _b; - // 32bit floats are enough here - util::geo::FPoint _geom; - StatInfo* _si; - const Component* _component; - - static StatInfo _blockerSI; + POINT _geom; + uint32_t _si; + uint32_t _component; +#ifdef PFAEDLE_DBG mutable bool _vis; - - static std::unordered_map _comps; +#endif + static std::vector _statInfos; }; } // namespace trgraph } // namespace pfaedle diff --git a/src/pfaedle/trgraph/Normalizer.cpp b/src/pfaedle/trgraph/Normalizer.cpp index 8f540b4..5daaa80 100644 --- a/src/pfaedle/trgraph/Normalizer.cpp +++ b/src/pfaedle/trgraph/Normalizer.cpp @@ -3,22 +3,41 @@ // Authors: Patrick Brosi #include +#include #include #include -#include +#include +#include #include +#include #include #include "pfaedle/trgraph/Normalizer.h" using pfaedle::trgraph::Normalizer; // _____________________________________________________________________________ -Normalizer::Normalizer(const ReplRules& rules) : _rulesOrig(rules) { +Normalizer::Normalizer(const ReplRules& rules) + : _rulesOrig(rules) { buildRules(rules); } // _____________________________________________________________________________ -std::string Normalizer::operator()(std::string sn) const { +Normalizer::Normalizer(const Normalizer& other) + : _rules(other._rules), + _rulesOrig(other._rulesOrig), + _cache(other._cache) {} + +// _____________________________________________________________________________ +Normalizer& Normalizer::operator=(Normalizer other) { + std::swap(this->_rules, other._rules); + std::swap(this->_rulesOrig, other._rulesOrig); + std::swap(this->_cache, other._cache); + + return *this; +} + +// _____________________________________________________________________________ +std::string Normalizer::norm(const std::string& sn) const { auto i = _cache.find(sn); if (i != _cache.end()) return i->second; diff --git a/src/pfaedle/trgraph/Normalizer.h b/src/pfaedle/trgraph/Normalizer.h index d85aac2..ae02d5a 100644 --- a/src/pfaedle/trgraph/Normalizer.h +++ b/src/pfaedle/trgraph/Normalizer.h @@ -28,8 +28,15 @@ class Normalizer { Normalizer() {} explicit Normalizer(const ReplRules& rules); - // Normalize sn based on the rules of this normalizer - std::string operator()(std::string sn) const; + // copy constructor + Normalizer(const Normalizer& other); + + // assignment op + Normalizer& operator=(Normalizer other); + + // Normalize sn, not thread safe + std::string norm(const std::string& sn) const; + bool operator==(const Normalizer& b) const; private: diff --git a/src/pfaedle/trgraph/StatGroup.cpp b/src/pfaedle/trgraph/StatGroup.cpp deleted file mode 100644 index c656a3c..0000000 --- a/src/pfaedle/trgraph/StatGroup.cpp +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#include -#include "pfaedle/trgraph/StatGroup.h" -#include "util/geo/Geo.h" - -using pfaedle::trgraph::StatGroup; -using pfaedle::trgraph::Node; -using pfaedle::router::NodeCandGroup; -using ad::cppgtfs::gtfs::Stop; - -// _____________________________________________________________________________ -StatGroup::StatGroup() {} - -// _____________________________________________________________________________ -void StatGroup::addStop(const Stop* s) { _stops.insert(s); } - -// _____________________________________________________________________________ -void StatGroup::addNode(trgraph::Node* n) { _nodes.insert(n); } - -// _____________________________________________________________________________ -void StatGroup::merge(StatGroup* other) { - if (other == this) return; - - std::set nds = other->getNodes(); - std::set stops = other->getStops(); - - for (auto on : nds) { - on->pl().getSI()->setGroup(this); - addNode(on); - } - - for (auto* os : stops) { - addStop(os); - } -} - -// _____________________________________________________________________________ -const NodeCandGroup& StatGroup::getNodeCands(const Stop* s) const { - return _stopNodePens.at(s); -} - -// _____________________________________________________________________________ -const std::set& StatGroup::getNodes() const { - return _nodes; -} - -// _____________________________________________________________________________ -void StatGroup::remNode(trgraph::Node* n) { - auto it = _nodes.find(n); - if (it != _nodes.end()) _nodes.erase(it); -} - -// _____________________________________________________________________________ -std::set& StatGroup::getNodes() { return _nodes; } - -// _____________________________________________________________________________ -const std::set& StatGroup::getStops() const { return _stops; } - -// _____________________________________________________________________________ -double StatGroup::getPen(const Stop* s, trgraph::Node* n, - const trgraph::Normalizer& platformNorm, - double trackPen, double distPenFac, - double nonOsmPen) const { - FPoint p = util::geo::latLngToWebMerc(s->getLat(), s->getLng()); - - double distPen = util::geo::webMercMeterDist(p, *n->pl().getGeom()); - distPen *= distPenFac; - - std::string platform = platformNorm(s->getPlatformCode()); - - if (!platform.empty() && !n->pl().getSI()->getTrack().empty() && - n->pl().getSI()->getTrack() == platform) { - trackPen = 0; - } - - if (n->pl().getSI()->isFromOsm()) nonOsmPen = 0; - - return distPen + trackPen + nonOsmPen; -} - -// _____________________________________________________________________________ -void StatGroup::writePens(const trgraph::Normalizer& platformNorm, - double trackPen, double distPenFac, - double nonOsmPen) { - if (_stopNodePens.size()) return; // already written - for (auto* s : _stops) { - for (auto* n : _nodes) { - _stopNodePens[s].push_back(router::NodeCand{ - n, getPen(s, n, platformNorm, trackPen, distPenFac, nonOsmPen)}); - } - } -} diff --git a/src/pfaedle/trgraph/StatGroup.h b/src/pfaedle/trgraph/StatGroup.h deleted file mode 100644 index a3341af..0000000 --- a/src/pfaedle/trgraph/StatGroup.h +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef PFAEDLE_TRGRAPH_STATGROUP_H_ -#define PFAEDLE_TRGRAPH_STATGROUP_H_ - -#include -#include -#include -#include "ad/cppgtfs/gtfs/Feed.h" -#include "pfaedle/router/Router.h" -#include "pfaedle/trgraph/Graph.h" -#include "pfaedle/trgraph/Normalizer.h" - -namespace pfaedle { -namespace trgraph { - -using ad::cppgtfs::gtfs::Stop; - -/* - * A group of stations that belong together semantically (for example, multiple - * stop points of a larger bus station) - */ -class StatGroup { - public: - StatGroup(); - StatGroup(const StatGroup& a) = delete; - - // Add a stop s to this station group - void addStop(const Stop* s); - - // Add a node n to this station group - void addNode(trgraph::Node* n); - - // Return all nodes contained in this group - const std::set& getNodes() const; - std::set& getNodes(); - - // Return all stops contained in this group - const std::set& getStops() const; - - // Remove a node from this group - void remNode(trgraph::Node* n); - - // All nodes in other will be in this group, their SI's updated, and the - // "other" group deleted. - void merge(StatGroup* other); - - // Return node candidates for stop s from this group - const router::NodeCandGroup& getNodeCands(const Stop* s) const; - - // Write the penalties for all stops contained in this group so far. - void writePens(const trgraph::Normalizer& platformNorm, double trackPen, - double distPenFac, double nonOsmPen); - - private: - std::set _nodes; - std::set _stops; - - // for each stop in this group, a penalty for each of the nodes here, based on - // its distance and optionally the track number - std::unordered_map _stopNodePens; - - double getPen(const Stop* s, trgraph::Node* n, - const trgraph::Normalizer& norm, double trackPen, - double distPenFac, double nonOsmPen) const; -}; -} // namespace trgraph -} // namespace pfaedle - -#endif // PFAEDLE_TRGRAPH_STATGROUP_H_ diff --git a/src/pfaedle/trgraph/StatInfo.cpp b/src/pfaedle/trgraph/StatInfo.cpp index 1893244..132c985 100644 --- a/src/pfaedle/trgraph/StatInfo.cpp +++ b/src/pfaedle/trgraph/StatInfo.cpp @@ -3,63 +3,24 @@ // Authors: Patrick Brosi #include "pfaedle/router/Comp.h" -#include "pfaedle/trgraph/StatGroup.h" #include "pfaedle/trgraph/StatInfo.h" using pfaedle::trgraph::StatInfo; -using pfaedle::trgraph::StatGroup; - -std::unordered_map StatInfo::_groups; // _____________________________________________________________________________ -StatInfo::StatInfo() : _name(""), _track(""), _fromOsm(false), _group(0) {} +StatInfo::StatInfo() : _name(""), _track("") {} // _____________________________________________________________________________ StatInfo::StatInfo(const StatInfo& si) - : _name(si._name), - _altNames(si._altNames), - _track(si._track), - _fromOsm(si._fromOsm), - _group(0) { - setGroup(si._group); + : _name(si._name), _altNames(si._altNames), _track(si._track) { +#ifdef PFAEDLE_STATION_IDS + _id = si._id; +#endif } // _____________________________________________________________________________ -StatInfo::StatInfo(const std::string& name, const std::string& track, - bool fromOsm) - : _name(name), _track(track), _fromOsm(fromOsm), _group(0) {} - -// _____________________________________________________________________________ -StatInfo::~StatInfo() { unRefGroup(_group); } - -// _____________________________________________________________________________ -void StatInfo::unRefGroup(StatGroup* g) { - if (g) { - _groups[g]--; - if (_groups[g] == 0) { - // std::cout << "Deleting " << g << std::endl; - delete g; - _groups.erase(_groups.find(g)); - } - } -} - -// _____________________________________________________________________________ -void StatInfo::setGroup(StatGroup* g) { - if (_group == g) return; - unRefGroup(_group); - - _group = g; - - // NOT thread safe! - if (!_groups.count(g)) - _groups[g] = 1; - else - _groups[g]++; -} - -// _____________________________________________________________________________ -StatGroup* StatInfo::getGroup() const { return _group; } +StatInfo::StatInfo(const std::string& name, const std::string& track) + : _name(name), _track(track) {} // _____________________________________________________________________________ const std::string& StatInfo::getName() const { return _name; } @@ -67,12 +28,6 @@ const std::string& StatInfo::getName() const { return _name; } // _____________________________________________________________________________ const std::string& StatInfo::getTrack() const { return _track; } -// _____________________________________________________________________________ -bool StatInfo::isFromOsm() const { return _fromOsm; } - -// _____________________________________________________________________________ -void StatInfo::setIsFromOsm(bool is) { _fromOsm = is; } - // _____________________________________________________________________________ double StatInfo::simi(const StatInfo* other) const { if (!other) return 0; diff --git a/src/pfaedle/trgraph/StatInfo.h b/src/pfaedle/trgraph/StatInfo.h index eedc378..dcd5ba7 100644 --- a/src/pfaedle/trgraph/StatInfo.h +++ b/src/pfaedle/trgraph/StatInfo.h @@ -6,24 +6,20 @@ #define PFAEDLE_TRGRAPH_STATINFO_H_ #include -#include #include +#include namespace pfaedle { namespace trgraph { -// forward declaration -class StatGroup; - /* - * Meta information (name, alternative names, track, group...) of a single stop + * Meta information (name, alternative names, track, ...) of a single stop */ class StatInfo { public: StatInfo(); StatInfo(const StatInfo& si); - StatInfo(const std::string& name, const std::string& track, bool _fromOsm); - ~StatInfo(); + StatInfo(const std::string& name, const std::string& track); // Return this stops names. const std::string& getName() const; @@ -43,27 +39,21 @@ class StatInfo { // Return the similarity between this stop and other double simi(const StatInfo* other) const; - // Set this stations group. - void setGroup(StatGroup* g); - - // Return this stations group. - StatGroup* getGroup() const; - - // True if this stop was from osm - bool isFromOsm() const; - - // Set this stop as coming from osm - void setIsFromOsm(bool is); +#ifdef PFAEDLE_STATION_IDS + const std::string& getId() const { return _id; } + void setId(const std::string& id) { _id = id; } +#endif private: std::string _name; std::vector _altNames; std::string _track; - bool _fromOsm; - StatGroup* _group; - static std::unordered_map _groups; - static void unRefGroup(StatGroup* g); +#ifdef PFAEDLE_STATION_IDS + // debug feature to store station ids from both OSM + // and GTFS + std::string _id; +#endif }; } // namespace trgraph } // namespace pfaedle diff --git a/src/shapevl/CMakeLists.txt b/src/shapevl/CMakeLists.txt new file mode 100644 index 0000000..bb55963 --- /dev/null +++ b/src/shapevl/CMakeLists.txt @@ -0,0 +1,17 @@ +file(GLOB_RECURSE shapevl_SRC *.cpp) + +set(shapevl_main ShapevlMain.cpp) + +list(REMOVE_ITEM shapevl_SRC ${shapevl_main}) + +include_directories( + ${PFAEDLE_INCLUDE_DIR} + SYSTEM ${LIBZIP_INCLUDE_DIR} + SYSTEM ${LIBZIP_CONF_INCLUDE_DIR} +) + +add_executable(shapevl ${shapevl_main}) +add_library(shapevl_dep ${shapevl_SRC}) + +include_directories(shapevl_dep PUBLIC ${PROJECT_SOURCE_DIR}/src/cppgtfs/src) +target_link_libraries(shapevl shapevl_dep util ad_cppgtfs -lpthread ${LIBZIP_LIBRARY}) diff --git a/src/shapevl/Collector.cpp b/src/shapevl/Collector.cpp new file mode 100644 index 0000000..f75e940 --- /dev/null +++ b/src/shapevl/Collector.cpp @@ -0,0 +1,485 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#include +#include +#include +#include +#include "ad/cppgtfs/gtfs/Feed.h" +#include "pfaedle/Def.h" +#include "shapevl/Collector.h" +#include "shapevl/Result.h" +#include "util/geo/Geo.h" +#include "util/geo/PolyLine.h" +#include "util/geo/output/GeoJsonOutput.h" +#include "util/log/Log.h" + +using util::geo::PolyLine; + +using ad::cppgtfs::gtfs::Shape; +using ad::cppgtfs::gtfs::Trip; +using pfaedle::eval::Collector; +using pfaedle::eval::Result; +using util::geo::output::GeoJsonOutput; + +// _____________________________________________________________________________ +double Collector::add(const Trip* oldT, const Shape* oldS, const Trip* newT, + const Shape* newS) { + // This adds a new trip with a new shape to our evaluation. + _trips++; + + if (!oldS) { + // If there is no original shape, we cannot compare them - abort! + _noOrigShp++; + return 0; + } + + for (auto st : oldT->getStopTimes()) { + if (st.getShapeDistanceTravelled() < 0) { + // we cannot safely compare trips without shape dist travelled + // information - abort! + _noOrigShp++; + return 0; + } + } + + for (auto st : newT->getStopTimes()) { + if (st.getShapeDistanceTravelled() < 0) { + // we cannot safely compare trips without shape dist travelled + // information - abort! + _noOrigShp++; + return 0; + } + } + + double fd = 0; + + // A "segment" is a path from station s_i to station s_{i+1} + + size_t unmatchedSegments; // number of unmatched segments + double unmatchedSegmentsLength; // total _an. length of unmatched segments + + std::vector oldDists; + LINE oldL = getLine(oldS, &oldDists); + + std::vector newDists; + LINE newL = getLine(newS, &newDists); + + // check dist between anchor points + + if ((util::geo::latLngLen(oldL) * 1.0) / (oldL.size() * 1.0) > 1000) { + // most likely input with a degenerated shape - dont compare + _noOrigShp++; + return 0; + } + + if ((util::geo::latLngLen(newL) * 1.0) / (newL.size() * 1.0) > 1000) { + // most likely input with a degenerated shape - dont compare + _noOrigShp++; + return 0; + } + + std::vector> newLenDists; + std::vector> oldLenDists; + + auto oldSegs = segmentize(oldT, oldL, oldDists, newLenDists); + auto newSegs = segmentize(newT, newL, newDists, oldLenDists); + + for (const auto& p : oldLenDists) { + _distDiffs.push_back(fabs(p.first - p.second)); + _hopDists.push_back(p.first); + } + + // new lines build from cleaned-up shapes + LINE oldLCut; + LINE newLCut; + + for (auto oldL : oldSegs) + oldLCut.insert(oldLCut.end(), oldL.begin(), oldL.end()); + + for (auto newL : newSegs) { + newLCut.insert(newLCut.end(), newL.begin(), newL.end()); + } + + // convert (roughly) to degrees + double SEGL = 25.0 / util::geo::M_PER_DEG; + + double f = util::geo::webMercDistFactor(oldLCut.front()); + + // roughly half a meter + auto oldLCutS = + util::geo::simplify(oldLCut, f * (0.5 / util::geo::M_PER_DEG)); + auto newLCutS = + util::geo::simplify(newLCut, f * (0.5 / util::geo::M_PER_DEG)); + + auto old = _dCache.find(oldLCutS); + if (old != _dCache.end()) { + auto match = old->second.find(newLCutS); + if (match != old->second.end()) { + fd = match->second; + } else { + fd = util::geo::accFrechetDistCHav(oldLCutS, newLCutS, SEGL); + _dCache[oldLCutS][newLCutS] = fd; + } + } else { + fd = util::geo::accFrechetDistCHav(oldLCutS, newLCutS, SEGL); + _dCache[oldLCutS][newLCutS] = fd; + } + + auto dA = getDa(oldSegs, newSegs); + unmatchedSegments = dA.first; + unmatchedSegmentsLength = dA.second; + + double totL = 0; + for (auto l : oldSegs) totL += util::geo::latLngLen(l); + + // filter out shapes with a length of under 5 meters - they are most likely + // artifacts + if (totL < 5) { + _noOrigShp++; + return 0; + } + + _fdSum += fd / totL; + _unmatchedSegSum += unmatchedSegments; + _unmatchedSegLengthSum += unmatchedSegmentsLength; + + double avgFd = fd / totL; + double AN = static_cast(unmatchedSegments) / + static_cast(oldSegs.size()); + double AL = unmatchedSegmentsLength / totL; + + _results.insert(Result(oldT, avgFd)); + + if (AN <= 0.0001) _an0++; + if (AN <= 0.05) _an5++; + if (AN <= 0.1) _an10++; + if (AN <= 0.2) _an20++; + if (AN <= 0.3) _an30++; + if (AN <= 0.5) _an50++; + if (AN <= 0.7) _an70++; + if (AN <= 0.9) _an90++; + + LOG(VDEBUG) << "This result (" << oldT->getId() + << "): A_N/N = " << unmatchedSegments << "/" << oldSegs.size() + << " = " << AN << " A_L/L = " << unmatchedSegmentsLength << "/" + << totL << " = " << AL << " d_f = " << avgFd; + + if (_reportOut) { + (*_reportOut) << std::fixed << std::setprecision(6); + (*_reportOut) << oldT->getId() << "\t" << AN << "\t" << AL << "\t" << avgFd + << "\t" << util::geo::getWKT(oldSegs) << "\t" + << util::geo::getWKT(newSegs) << "\t" << oldT->getRoute()->getShortName() << "\t"; + + for (const auto& st : oldT->getStopTimes()) { + (*_reportOut) << st.getStop()->getName() << "\t" + << st.getStop()->getLat() << "\t" + << st.getStop()->getLng() << "\t"; + } +(*_reportOut) << "\n"; + } + + return avgFd; +} + +// _____________________________________________________________________________ +std::vector Collector::segmentize( + const Trip* t, const LINE& shape, const std::vector& dists, + std::vector>& lenDist) { + std::vector ret; + + if (t->getStopTimes().size() < 2) return ret; + + POLYLINE pl(shape); + std::vector cuts; + + size_t i = 0; + for (const auto& st : t->getStopTimes()) { + cuts.push_back(st.getShapeDistanceTravelled()); + i++; + } + + + size_t to = std::upper_bound(dists.begin(), dists.end(), cuts[0]) - + dists.begin(); + + POINT lastP; + if (to >= dists.size()) { + lastP = shape.back(); + } else if (to == 0) { + lastP = shape.front(); + } else { + double progr = (cuts[0] - dists[to - 1]) / (dists[to] - dists[to - 1]); + lastP = shape[to - 1]; + lastP.setX(lastP.getX() + progr * (shape[to].getX() - shape[to-1].getX())); + lastP.setY(lastP.getY() + progr * (shape[to].getY() - shape[to-1].getY())); + } + + for (size_t i = 1; i < cuts.size(); i++) { + size_t to = std::upper_bound(dists.begin(), dists.end(), cuts[i]) - + dists.begin(); + + POINT curP; + if (to >= dists.size()) { + curP = shape.back(); + } else if (to == 0) { + curP = shape.front(); + } else { + curP = shape[to - 1]; + double progr = (cuts[i] - dists[to - 1]) / (dists[to] - dists[to - 1]); + curP.setX(curP.getX() + progr * (shape[to].getX() - shape[to-1].getX())); + curP.setY(curP.getY() + progr * (shape[to].getY() - shape[to-1].getY())); + } + + auto curL = pl.getSegment(lastP, curP).getLine(); + + double dist = + util::geo::haversine(t->getStopTimes()[i - 1].getStop()->getLat(), + t->getStopTimes()[i - 1].getStop()->getLng(), + t->getStopTimes()[i].getStop()->getLat(), + t->getStopTimes()[i].getStop()->getLng()); + double len = util::geo::latLngLen(curL); + lenDist.push_back({dist, len}); + + ret.push_back(curL); + lastP = curP; + } + + return ret; +} + +// _____________________________________________________________________________ +LINE Collector::getLine(const Shape* s, std::vector* dists) { + LINE ret; + + for (size_t i = 0; i < s->getPoints().size(); i++) { + ret.push_back({s->getPoints()[i].lng, s->getPoints()[i].lat}); + (*dists).push_back(s->getPoints()[i].travelDist); + } + return ret; +} + +// _____________________________________________________________________________ +const std::set& Collector::getResults() const { return _results; } + +// _____________________________________________________________________________ +double Collector::getAvgDist() const { return _fdSum / _results.size(); } + +// _____________________________________________________________________________ +void Collector::printCsv(std::ostream* os, + const std::set& result) const { + for (auto r : result) (*os) << r.getDist() << "\n"; +} + +// _____________________________________________________________________________ +double Collector::getAcc() const { + return static_cast(_an0) / static_cast(_results.size()); +} + +// _____________________________________________________________________________ +void Collector::printShortStats(std::ostream* os) const { + if (_results.size()) { + (*os) << (static_cast(_an0) / + static_cast(_results.size())) * + 100 + << ","; + (*os) << (static_cast(_an5) / + static_cast(_results.size())) * + 100 + << ","; + (*os) << (static_cast(_an10) / + static_cast(_results.size())) * + 100 + << ","; + (*os) << (static_cast(_an20) / + static_cast(_results.size())) * + 100 + << ","; + (*os) << (static_cast(_an30) / + static_cast(_results.size())) * + 100 + << ","; + (*os) << (static_cast(_an50) / + static_cast(_results.size())) * + 100 + << ","; + (*os) << (static_cast(_an70) / + static_cast(_results.size())) * + 100 + << ","; + (*os) << (static_cast(_an90) / + static_cast(_results.size())) * + 100; + } +} + +// _____________________________________________________________________________ +void Collector::printStats(std::ostream* os) const { + (*os) << std::setfill(' ') << std::setw(50) << " # of trips: " << _trips + << "\n"; + (*os) << std::setfill(' ') << std::setw(50) + << " # of trips new shapes were matched for: " << _results.size() + << "\n"; + (*os) << std::setw(50) << " # of trips without input shapes: " << _noOrigShp + << "\n"; + + if (_results.size()) { + (*os) << std::setw(50) << " highest avg frechet distance to input shapes: " + << (--_results.end())->getDist() << " (on trip #" + << (--_results.end())->getTrip()->getId() << ")\n"; + (*os) << std::setw(50) << " lowest distance to input shapes: " + << (_results.begin())->getDist() << " (on trip #" + << (_results.begin())->getTrip()->getId() << ")\n"; + (*os) << std::setw(50) + << " averaged avg frechet distance: " << getAvgDist() << "\n"; + + (*os) << "\n"; + (*os) << " an-0: " + << (static_cast(_an0) / + static_cast(_results.size())) * + 100 + << " %" + << "\n"; + (*os) << " an-5: " + << (static_cast(_an5) / + static_cast(_results.size())) * + 100 + << " %" + << "\n"; + (*os) << " an-10: " + << (static_cast(_an10) / + static_cast(_results.size())) * + 100 + << " %" + << "\n"; + (*os) << " an-20: " + << (static_cast(_an20) / + static_cast(_results.size())) * + 100 + << " %" + << "\n"; + (*os) << " acc-30: " + << (static_cast(_an30) / + static_cast(_results.size())) * + 100 + << " %" + << "\n"; + (*os) << " acc-50: " + << (static_cast(_an50) / + static_cast(_results.size())) * + 100 + << " %" + << "\n"; + (*os) << " acc-70: " + << (static_cast(_an70) / + static_cast(_results.size())) * + 100 + << " %" + << "\n"; + (*os) << " acc-90: " + << (static_cast(_an90) / + static_cast(_results.size())) * + 100 + << " %" + << "\n"; + } + + (*os) << std::endl; +} + +// _____________________________________________________________________________ +std::map Collector::getStats() { + std::map stats; + + if (_distDiffs.size()) { + auto i = _distDiffs.begin() + _distDiffs.size() / 2; + + // std::nth_element makes a partial sort of the first n elements + std::nth_element(_distDiffs.begin(), i, _distDiffs.end()); + + stats["median-dist-diff"] = *i; + } else { + stats["median-dist-diff"] = -1; + } + + if (_hopDists.size()) { + double s = 0; + for (auto d : _hopDists) s += d; + + stats["avg-hop-dist"] = s / (_hopDists.size() * 1.0); + } else { + stats["avg-hop-dist"] = -1; + } + + stats["num-trips"] = _trips; + stats["num-trips-matched"] = _results.size(); + stats["num-trips-wo-shapes"] = _noOrigShp; + stats["avg-fr"] = getAvgDist(); + if (_results.size()) { + stats["max-avg-frech-dist"] = (--_results.end())->getDist(); + } else { + stats["max-avg-frech-dist"] = -1; + } + stats["an-0"] = + (static_cast(_an0) / static_cast(_results.size())) * 100; + stats["an-5"] = + (static_cast(_an5) / static_cast(_results.size())) * 100; + stats["an-10"] = + (static_cast(_an10) / static_cast(_results.size())) * 100; + stats["an-20"] = + (static_cast(_an20) / static_cast(_results.size())) * 100; + stats["an-30"] = + (static_cast(_an30) / static_cast(_results.size())) * 100; + stats["an-50"] = + (static_cast(_an50) / static_cast(_results.size())) * 100; + stats["an-70"] = + (static_cast(_an70) / static_cast(_results.size())) * 100; + stats["an-90"] = + (static_cast(_an90) / static_cast(_results.size())) * 100; + + return stats; +} + +// _____________________________________________________________________________ +std::pair Collector::getDa(const std::vector& a, + const std::vector& b) { + assert(a.size() == b.size()); + std::pair ret{0, 0}; + + // convert (roughly) to degrees + double SEGL = 25 / util::geo::M_PER_DEG; + + double MAX = 100; + + for (size_t i = 0; i < a.size(); i++) { + double fdMeter = 0; + + double f = util::geo::webMercDistFactor(a[i].front()); + + // roughly half a meter + auto aSimpl = util::geo::simplify(a[i], f * (0.5 / util::geo::M_PER_DEG)); + auto bSimpl = util::geo::simplify(b[i], f * (0.5 / util::geo::M_PER_DEG)); + + auto old = _dACache.find(aSimpl); + if (old != _dACache.end()) { + auto match = old->second.find(bSimpl); + if (match != old->second.end()) { + fdMeter = match->second; + } else { + fdMeter = util::geo::frechetDistHav(aSimpl, bSimpl, SEGL); + _dACache[aSimpl][bSimpl] = fdMeter; + } + } else { + fdMeter = util::geo::frechetDistHav(aSimpl, bSimpl, SEGL); + _dACache[aSimpl][bSimpl] = fdMeter; + } + + if (fdMeter >= MAX) { + ret.first++; + ret.second += util::geo::latLngLen(aSimpl); + } + } + + return ret; +} diff --git a/src/shapevl/Collector.h b/src/shapevl/Collector.h new file mode 100644 index 0000000..724f00d --- /dev/null +++ b/src/shapevl/Collector.h @@ -0,0 +1,129 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#ifndef PFAEDLE_EVAL_COLLECTOR_H_ +#define PFAEDLE_EVAL_COLLECTOR_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "ad/cppgtfs/gtfs/Feed.h" +#include "pfaedle/Def.h" +#include "shapevl/Result.h" +#include "util/geo/Geo.h" +#include "util/json/Writer.h" + +using ad::cppgtfs::gtfs::Shape; +using ad::cppgtfs::gtfs::Trip; + +namespace pfaedle { +namespace eval { + +struct lineCmp { + bool operator()(const LINE& a, const LINE& b) const { + if (a.size() != b.size()) { + return a.size() < b.size(); + } + + for (size_t i = 0; i < a.size(); i++) { + if (util::geo::dist(a[i], b[i]) > .00001) { + return (a[i].getX() < b[i].getX()) || + (a[i].getX() == b[i].getX() && a[i].getY() < b[i].getY()); + ; + } + } + + return false; + } +}; + +/* + * Collects routing results for evaluation + */ +class Collector { + public: + Collector(std::ostream* reportOut) + : _trips(0), + _noOrigShp(0), + _fdSum(0), + _unmatchedSegSum(0), + _unmatchedSegLengthSum(0), + _an0(0), + _an5(0), + _an10(0), + _an30(0), + _an50(0), + _an70(0), + _an90(0), + _reportOut(reportOut) {} + + // Add a shape found by our tool newS for a trip t with newly calculated + // station dist values with the old shape oldS + double add(const Trip* oldT, const Shape* oldS, const Trip* newT, + const Shape* newS); + + // Return the set of all Result objects + const std::set& getResults() const; + + // Print general stats to os + void printStats(std::ostream* os) const; + + // Print general stats to os + void printShortStats(std::ostream* os) const; + + // Get JSON stats + std::map getStats(); + + // Print a CSV for the results to os + void printCsv(std::ostream* os, const std::set& result) const; + + // Return the averaged average frechet distance + double getAvgDist() const; + + static LINE getLine(const Shape* s, std::vector* dists); + + double getAcc() const; + + private: + std::set _results; + std::map, lineCmp> _dCache; + std::map, lineCmp> _dACache; + + size_t _trips; + size_t _noOrigShp; + + std::vector _distDiffs; + std::vector _hopDists; + + double _fdSum; + size_t _unmatchedSegSum; + double _unmatchedSegLengthSum; + + size_t _an0; + size_t _an5; + size_t _an10; + size_t _an20; + size_t _an30; + size_t _an50; + size_t _an70; + size_t _an90; + + std::ostream* _reportOut; + + std::pair getDa(const std::vector& a, + const std::vector& b); + + static std::vector segmentize( + const Trip* t, const LINE& shape, const std::vector& dists, + std::vector>& lenDist); +}; + +} // namespace eval +} // namespace pfaedle + +#endif // PFAEDLE_EVAL_COLLECTOR_H_ diff --git a/src/pfaedle/eval/Result.h b/src/shapevl/Result.h similarity index 100% rename from src/pfaedle/eval/Result.h rename to src/shapevl/Result.h diff --git a/src/shapevl/ShapevlMain.cpp b/src/shapevl/ShapevlMain.cpp new file mode 100644 index 0000000..bade8d9 --- /dev/null +++ b/src/shapevl/ShapevlMain.cpp @@ -0,0 +1,276 @@ +// Copyright 2020, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Authors: Patrick Brosi + +#include +#include +#include +#include +#include +#include +#include +#include "ad/cppgtfs/Parser.h" +#include "pfaedle/router/TripTrie.h" +#include "shapevl/Collector.h" +#include "util/Misc.h" +#include "util/json/Writer.h" +#include "util/log/Log.h" + +using pfaedle::router::TripTrie; + +std::atomic count(0); + +// _____________________________________________________________________________ +void printHelp(int argc, char** argv) { + UNUSED(argc); + std::cout << "Usage: " << argv[0] + << " [-f ] -g [-s] " + << "\n"; + std::cout + << "\nAllowed arguments:\n -g Ground truth GTFS file\n"; + std::cout << " -s Only output summary\n"; + std::cout << " --json Output JSON\n"; + std::cout << " --avg Take avg of all inputs (only for --json)\n"; + std::cout << " -f Output full reports (per feed) to \n"; + std::cout + << " -m MOTs to match (GTFS MOT or string, default: all)\n"; +} + +// _____________________________________________________________________________ +void eval(const std::vector* paths, + std::vector* colls, + const std::set* mots, + const ad::cppgtfs::gtfs::Feed* evalFeed, bool unique) { + while (1) { + int myFeed = count-- - 1; + if (myFeed < 0) return; + std::string path = (*paths)[myFeed]; + LOG(DEBUG) << "Reading eval feed " << path << " ..."; + ad::cppgtfs::gtfs::Feed feed; + + try { + ad::cppgtfs::Parser p(path); + p.parse(&feed); + } catch (const ad::cppgtfs::ParserException& ex) { + LOG(ERROR) << "Could not parse GTFS feed " << path << ", reason was:"; + std::cerr << ex.what() << std::endl; + exit(1); + } + + std::vector trips; + + if (unique) { + std::map>> + forest; + for (auto t : evalFeed->getTrips()) { + auto& subForest = forest[t.second->getRoute()]; + bool ins = false; + for (auto& trie : subForest) { + if (trie.addTrip(t.second, + pfaedle::router::RoutingAttrs{ + t.second->getRoute()->getId(), "", ""}, + false, false)) { + ins = true; + break; + } + } + + if (!ins) { + subForest.resize(subForest.size() + 1); + subForest.back().addTrip(t.second, + pfaedle::router::RoutingAttrs{ + t.second->getRoute()->getId(), "", ""}, + false, false); + } + } + for (auto f : forest) { + for (auto sf : f.second) { + for (auto leaf : sf.getNdTrips()) { + // only one reference node + trips.push_back(leaf.second.front()); + } + } + } + } else { + for (auto t : evalFeed->getTrips()) { + trips.push_back(t.second); + } + } + + LOG(DEBUG) << "Evaluating " << path << "..."; + size_t i = 0; + for (const auto& oldTrip : trips) { + LOG(DEBUG) << "@ " << ++i << "/" << trips.size(); + if (!mots->count(oldTrip->getRoute()->getType())) continue; + auto newTrip = feed.getTrips().get(oldTrip->getId()); + if (!newTrip) { + LOG(ERROR) << "Trip #" << oldTrip->getId() << " not present in " << path + << ", skipping..."; + continue; + } + (*colls)[myFeed].add(oldTrip, oldTrip->getShape(), newTrip, + newTrip->getShape()); + } + } +} + +// _____________________________________________________________________________ +int main(int argc, char** argv) { + // disable output buffering for standard output + setbuf(stdout, NULL); + + // initialize randomness + srand(time(NULL) + rand()); // NOLINT + + std::string groundTruthFeedPath, motStr; + motStr = "all"; + ad::cppgtfs::gtfs::Feed groundTruthFeed; + std::string fullReportPath = ""; + std::vector evlFeedPaths; + std::set evlFeedPathsUniq; + std::vector evalColls; + std::vector reportStreams; + bool summarize = false; + bool json = false; + bool avg = false; + bool unique = false; + + for (int i = 1; i < argc; i++) { + std::string cur = argv[i]; + if (cur == "-h" || cur == "--help") { + printHelp(argc, argv); + exit(0); + } else if (cur == "-g") { + if (++i >= argc) { + LOG(ERROR) << "Missing argument for ground truth (-g)."; + exit(1); + } + groundTruthFeedPath = argv[i]; + } else if (cur == "-s") { + summarize = true; + } else if (cur == "--json") { + json = true; + } else if (cur == "--unique") { + unique = true; + } else if (cur == "--avg") { + avg = true; + } else if (cur == "-f") { + if (++i >= argc) { + LOG(ERROR) << "Missing argument for full reports (-f)."; + exit(1); + } + fullReportPath = argv[i]; + } else if (cur == "-m") { + if (++i >= argc) { + LOG(ERROR) << "Missing argument for mot (-m)."; + exit(1); + } + motStr = argv[i]; + } else { + char fullPath[PATH_MAX + 1]; + if (!realpath(cur.c_str(), fullPath)) { + LOG(ERROR) << "Error while reading " << fullPath; + exit(1); + } + evlFeedPathsUniq.insert(fullPath); + } + } + + for (const auto& feedPath : evlFeedPathsUniq) { + evlFeedPaths.push_back(feedPath); + if (fullReportPath.size()) { + reportStreams.emplace_back(); + reportStreams.back().exceptions(std::ios::failbit | std::ios::badbit); + reportStreams.back().open(fullReportPath + "/" + + util::split(feedPath, '/').back() + + ".fullreport.tsv"); + evalColls.push_back({&reportStreams.back()}); + } else { + evalColls.push_back({0}); + } + count++; + } + + if (groundTruthFeedPath.size() == 0) { + LOG(ERROR) << "No ground truth feed path given (-g)."; + exit(1); + } + + std::set mots = + ad::cppgtfs::gtfs::flat::Route::getTypesFromString(util::trim(motStr)); + + std::vector evlFeeds(evlFeedPaths.size()); + + try { + LOG(DEBUG) << "Reading ground truth feed" << groundTruthFeedPath << " ..."; + ad::cppgtfs::Parser p(groundTruthFeedPath); + p.parse(&groundTruthFeed); + } catch (const ad::cppgtfs::ParserException& ex) { + LOG(ERROR) << "Could not parse input GTFS feed, reason was:"; + std::cerr << ex.what() << std::endl; + exit(1); + } + + size_t THREADS = std::thread::hardware_concurrency(); + + std::vector thrds(THREADS); + for (auto& thr : thrds) + thr = std::thread(&eval, &evlFeedPaths, &evalColls, &mots, &groundTruthFeed, + unique); + + for (auto& thr : thrds) thr.join(); + + if (json) { + util::json::Dict stats = {}; + + for (size_t i = 0; i < evalColls.size(); i++) { + util::json::Dict locStats = {}; + for (const auto& kv : evalColls[i].getStats()) { + locStats[kv.first] = kv.second; + } + stats[evlFeedPaths[i]] = locStats; + } + + util::json::Dict jsonStats; + + if (evalColls.size() == 1) { + jsonStats = {{"statistics", stats[evlFeedPaths[0]]}}; + } else { + if (avg) { + double count = evalColls.size(); + std::vector keys; + for (const auto& a : evalColls[0].getStats()) { + keys.push_back(a.first); + } + util::json::Dict avgStats; + for (const auto& k : keys) { + double sum = 0; + for (size_t i = 0; i < evalColls.size(); i++) { + sum += evalColls[i].getStats()[k]; + } + avgStats[k] = sum / count; + } + jsonStats = {{"statistics", avgStats}}; + } else { + jsonStats = {{"statistics", stats}}; + } + } + + util::json::Writer wr(&std::cout, 10, true); + wr.val(jsonStats); + wr.closeAll(); + } else { + for (size_t i = 0; i < evalColls.size(); i++) { + if (summarize) { + std::cout << evlFeedPaths[i] << ": "; + evalColls[i].printShortStats(&std::cout); + std::cout << std::endl; + } else { + std::cout << " == Evaluation results for " << evlFeedPaths[i] + << " ===" << std::endl; + evalColls[i].printStats(&std::cout); + } + } + } +} diff --git a/src/util b/src/util new file mode 160000 index 0000000..d1c30e9 --- /dev/null +++ b/src/util @@ -0,0 +1 @@ +Subproject commit d1c30e9ec4cb68803be073d35beb6af2b860bda4 diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt deleted file mode 100644 index 1c3f62f..0000000 --- a/src/util/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -file(GLOB_RECURSE util_SRC *.cpp) -list(REMOVE_ITEM util_SRC TestMain.cpp) -add_library(util ${util_SRC}) - -add_subdirectory(tests) diff --git a/src/util/Misc.h b/src/util/Misc.h deleted file mode 100644 index eecde29..0000000 --- a/src/util/Misc.h +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2017, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef UTIL_MISC_H_ -#define UTIL_MISC_H_ - -#include -#include - -#define UNUSED(expr) do { (void)(expr); } while (0) -#define TIME() std::chrono::high_resolution_clock::now() -#define TOOK(t1, t2) (std::chrono::duration_cast(t2 - t1).count() / 1000.0) - -namespace util { - -// cached first 10 powers of 10 -static int pow10[10] = { - 1, 10, 100, 1000, 10000, - 100000, 1000000, 10000000, 100000000, 1000000000}; - -// _____________________________________________________________________________ -inline uint64_t factorial(uint64_t n) { - if (n == 1) return n; - return n * factorial(n - 1); -} - -// _____________________________________________________________________________ -inline uint64_t atoul(const char* p) { - uint64_t ret = 0; - - while (*p) { - ret = ret * 10 + (*p++ - '0'); - } - - return ret; -} - -// _____________________________________________________________________________ -inline float atof(const char* p, uint8_t mn) { - // this atof implementation works only on "normal" float strings like - // 56.445 or -345.00, but should be faster than std::atof - float ret = 0.0; - bool neg = false; - if (*p == '-') { - neg = true; - p++; - } - - while (*p >= '0' && *p <= '9') { - ret = ret * 10.0 + (*p - '0'); - p++; - } - - if (*p == '.') { - p++; - float f = 0; - uint8_t n = 0; - - for (; n < mn && *p >= '0' && *p <= '9'; n++, p++) { - f = f * 10.0 + (*p - '0'); - } - - if (n < 11) - ret += f / pow10[n]; - else - ret += f / std::pow(10, n); - } - - if (neg) return -ret; - return ret; -} - -// _____________________________________________________________________________ -inline double atof(const char* p) { return atof(p, 38); } - -} // namespace util - -#endif // UTIL_MISC_H_ diff --git a/src/util/Nullable.h b/src/util/Nullable.h deleted file mode 100644 index 892fd8c..0000000 --- a/src/util/Nullable.h +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright 2017, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef UTIL_NULLABLE_H_ -#define UTIL_NULLABLE_H_ - -namespace util { - -template -class Nullable { - public: - Nullable() - : val(), null(true) {} - Nullable(T* valPointer) - : val(), null(true) { - if (valPointer) { - assign(*valPointer); - } - } - Nullable(const T& value) - : val(value), null(false) {} - Nullable(const Nullable& other) - : val(other.val), null(other.isNull()) {} - - Nullable& operator=(const Nullable& other) { - val = other.get(); - null = other.isNull(); - return *this; - } - - T operator=(const T& other) { - assign(other); - return val; - } - - /** - * Passing through comparision operators - */ - - bool operator==(const Nullable& other) const { - return (other.isNull() && isNull()) || other.get() == get(); - } - - bool operator!=(const Nullable& other) const { - return !(*this == other); - } - - bool operator<(const Nullable& other) const { - return !other.isNull() && !isNull() && get() < other.get(); - } - - bool operator>(const Nullable& other) const { - return !(*this < other || *this == other); - } - - bool operator<=(const Nullable& other) const { - return *this < other || *this == other; - } - - bool operator>=(const Nullable& other) const { - return *this > other || *this == other; - } - - bool operator==(const T& other) const { - return !isNull() && other == get(); - } - - bool operator!=(const T& other) const { - return !(*this == other); - } - - bool operator<(const T& other) const { - return !isNull() && get() < other; - } - - bool operator>(const T& other) const { - return !(*this < other || *this == other); - } - - bool operator<=(const T& other) const { - return *this < other || *this == other; - } - - bool operator>=(const T& other) const { - return *this > other || *this == other; - } - - operator T() const { - return get(); - } - - bool isNull() const { - return null; - } - - T get() const { - if (!isNull()) return val; - else throw std::runtime_error("Trying to retrieve value of NULL object."); - } - -private: - void assign(T v) { - val = v; - null = false; - } - - T val; - bool null; -}; - -} - -#endif // UTIL_NULLABLE_H_ diff --git a/src/util/String.h b/src/util/String.h deleted file mode 100644 index 8f8071a..0000000 --- a/src/util/String.h +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2017, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef UTIL_STRING_H_ -#define UTIL_STRING_H_ - -#include -#include -#include -#include -#include - -namespace util { - -// _____________________________________________________________________________ -inline std::string urlDecode(const std::string& encoded) { - std::string decoded; - for (size_t i = 0; i < encoded.size(); ++i) { - char c = encoded[i]; - if (c == '%') { - std::string ah = encoded.substr(i + 1, 2); - char* nonProced = 0; - char hexVal = strtol(ah.c_str(), &nonProced, 16); - - if (ah.find_first_of("+-") > 1 && ah.size() - strlen(nonProced) == 2) { - c = hexVal; - i += 2; - } - } else if (c == '+') { - c = ' '; - } - decoded += c; - } - return decoded; -} - -// _____________________________________________________________________________ -inline std::string jsonStringEscape(const std::string& unescaped) { - std::string escaped; - for (size_t i = 0; i < unescaped.size(); ++i) { - if (unescaped[i] == '"' || unescaped[i] == '\\') { - escaped += "\\"; - } - if (iscntrl(unescaped[i])) { - escaped += " "; - } - escaped += unescaped[i]; - } - return escaped; -} - -// _____________________________________________________________________________ -inline bool replace(std::string& subj, const std::string& from, - const std::string& to) { - if (from.empty()) return false; - size_t start_pos = subj.find(from); - if (start_pos != std::string::npos) { - subj.replace(start_pos, from.length(), to); - return true; - } - - return false; -} - -// _____________________________________________________________________________ -inline bool replaceAll(std::string& subj, const std::string& from, - const std::string& to) { - if (from.empty()) return false; - bool found = false; - size_t s = subj.find(from, 0); - for (; s != std::string::npos; s = subj.find(from, s + to.length())) { - found = true; - subj.replace(s, from.length(), to); - } - - return found; -} - -// _____________________________________________________________________________ -inline std::string unixBasename(const std::string& pathname) { - return {std::find_if(pathname.rbegin(), pathname.rend(), - [](char c) { return c == '/'; }) - .base(), - pathname.end()}; -} - -// _____________________________________________________________________________ -template -inline std::string toString(T obj) { - std::stringstream ss; - ss << obj; - return ss.str(); -} - -// _____________________________________________________________________________ -inline std::vector split(std::string in, char sep) { - std::stringstream ss(in); - std::vector ret(1); - while (std::getline(ss, ret.back(), sep)) { - ret.push_back(""); - } - ret.pop_back(); - return ret; -} - -// _____________________________________________________________________________ -inline std::string ltrim(std::string str) { - str.erase(0, str.find_first_not_of(" \t\n\v\f\r")); - return str; -} - -// _____________________________________________________________________________ -inline std::string rtrim(std::string str) { - str.erase(str.find_last_not_of(" \t\n\v\f\r") + 1); - return str; -} - -// _____________________________________________________________________________ -inline std::string trim(std::string str) { return ltrim(rtrim(str)); } - -// _____________________________________________________________________________ -inline size_t editDist(const std::string& s1, const std::string& s2) { - // https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#C++ - size_t len1 = s1.size(); - size_t len2 = s2.size(); - std::vector cur(len2 + 1); - std::vector prev(len2 + 1); - - for (size_t i = 0; i < prev.size(); i++) prev[i] = i; - - for (size_t i = 0; i < len1; i++) { - cur[0] = i + 1; - for (size_t j = 0; j < len2; j++) { - cur[j + 1] = - std::min(prev[1 + j] + 1, - std::min(cur[j] + 1, prev[j] + (s1[i] == s2[j] ? 0 : 1))); - } - std::swap(cur, prev); - } - - return prev[len2]; -} - -// _____________________________________________________________________________ -template -inline std::string implode(const std::vector& vec, const char* del) { - std::stringstream ss; - for (size_t i = 0; i < vec.size(); i++) { - if (i != 0) ss << del; - ss << vec[i]; - } - - return ss.str(); -} -} - -#endif // UTIL_STRING_H_ diff --git a/src/util/geo/BezierCurve.h b/src/util/geo/BezierCurve.h deleted file mode 100644 index f3d25c5..0000000 --- a/src/util/geo/BezierCurve.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2016, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef UTIL_GEO_BEZIERCURVE_H_ -#define UTIL_GEO_BEZIERCURVE_H_ - -#include -#include "util/geo/Geo.h" -#include "util/geo/PolyLine.h" - -namespace util { -namespace geo { - -struct CubicPolynom { - CubicPolynom(double a, double b, double c, double d, double x) - : a(a), b(b), c(c), d(d), x(x) {} - CubicPolynom() : a(0), b(0), c(0), d(0), x(0) {} - double a, b, c, d, x; - - double valueAt(double x) const; -}; - -/** - * Bezier curve - */ -template -class BezierCurve { - public: - BezierCurve(const Point& a, const Point& b, const Point& c, const Point& d); - - const PolyLine& render(double d); - - private: - double _d; - - // the x and y polynoms for this spline - CubicPolynom _xp, _yp; - - // store the rendered polyline for quicker access - PolyLine _rendered; - bool _didRender; - - void recalcPolynoms(const Point& x, const Point& b, const Point& c, - const Point& d); - - Point valueAt(double t) const; -}; - -#include "util/geo/BezierCurve.tpp" -} -} - -#endif // UTIL_GEO_BEZIERCURVE_H_ diff --git a/src/util/geo/BezierCurve.tpp b/src/util/geo/BezierCurve.tpp deleted file mode 100644 index fb7e6ca..0000000 --- a/src/util/geo/BezierCurve.tpp +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2016, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -// _____________________________________________________________________________ -template -BezierCurve::BezierCurve(const Point& a, const Point& b, - const Point& c, const Point& d) - : _d(dist(a, d)) { - assert(_d > 0); - recalcPolynoms(a, b, c, d); -} - -// _____________________________________________________________________________ -template -void BezierCurve::recalcPolynoms(const Point& a, const Point& b, - const Point& c, const Point& d) { - _xp.a = a.getX(); - _xp.b = 3.0 * (b.getX() - a.getX()); - _xp.c = 3.0 * (c.getX() - b.getX()) - _xp.b; - _xp.d = d.getX() - a.getX() - _xp.c - _xp.b; - - _yp.a = a.getY(); - _yp.b = 3.0 * (b.getY() - a.getY()); - _yp.c = 3.0 * (c.getY() - b.getY()) - _yp.b; - _yp.d = d.getY() - a.getY() - _yp.c - _yp.b; - - _didRender = false; -} - -// _____________________________________________________________________________ -template -Point BezierCurve::valueAt(double t) const { - return Point(_xp.valueAt(t), _yp.valueAt(t)); -} - -// _____________________________________________________________________________ -template -const PolyLine& BezierCurve::render(double d) { - assert(d > 0); - if (_didRender) return _rendered; - - if (_d == 0) { - _rendered << Point(_xp.a, _yp.a) << Point(_xp.a, _yp.a); - return _rendered; - } - - _rendered.empty(); - double n = _d / d, dt = 1 / n, t = 0; - - bool cancel = false; - while (true) { - _rendered << valueAt(t); - t += dt; - if (cancel) break; - if (t > 1) { - t = 1; - cancel = true; - } - } - - _didRender = true; - return _rendered; -} - -// _____________________________________________________________________________ -double CubicPolynom::valueAt(double atx) const { - double dx = atx - x; - return a + b * dx + c * dx * dx + d * dx * dx * dx; -} diff --git a/src/util/geo/Box.h b/src/util/geo/Box.h deleted file mode 100644 index 43f05e9..0000000 --- a/src/util/geo/Box.h +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2016, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Patrick Brosi - -#ifndef UTIL_GEO_BOX_H_ -#define UTIL_GEO_BOX_H_ - -#include "./Point.h" - -namespace util { -namespace geo { - -template -class Box { - public: - // maximum inverse box as default value of box - Box() - : _ll(std::numeric_limits::max(), std::numeric_limits::max()), - _ur(std::numeric_limits::min(), std::numeric_limits::min()) {} - Box(const Point& ll, const Point& ur) : _ll(ll), _ur(ur) {} - const Point& getLowerLeft() const { return _ll; } - const Point& getUpperRight() const { return _ur; } - - Point& getLowerLeft() { return _ll; } - Point& getUpperRight() { return _ur; } - - void setLowerLeft(const Point& ll) { _ll = ll; } - void setUpperRight(const Point& ur) { _ur = ur; } - - bool operator==(const Box& b) const { - return getLowerLeft() == b.getLowerLeft() && - getUpperRight() == b.getUpperRight(); - } - - bool operator!=(const Box& p) const { return !(*this == p); } - - private: - Point _ll, _ur; -}; - -template -class RotatedBox { - public: - RotatedBox() : _box(), _deg(0), _center() {} - RotatedBox(const Box& box) - : _box(box), - _deg(0), - _center(Point( - (box.getUpperRight().getX() - box.getLowerLeft().getX()) / T(2), - (box.getUpperRight().getY() - box.getLowerLeft().getY()) / T(2))) {} - RotatedBox(const Point& ll, const Point& ur) - : _box(ll, ur), - _deg(0), - _center(Point((ur.getX() - ll.getX()) / T(2), - (ur.getY() - ll.getY()) / T(2))) {} - RotatedBox(const Box& box, double deg) - : _box(box), - _deg(deg), - _center(Point( - (box.getUpperRight().getX() - box.getLowerLeft().getX()) / T(2), - (box.getUpperRight().getY() - box.getLowerLeft().getY()) / T(2))) {} - RotatedBox(const Point& ll, const Point& ur, double deg) - : _box(ll, ur), - _deg(deg), - _center(Point((ur.getX() - ll.getX()) / T(2), - (ur.getY() - ll.getY()) / T(2))) {} - RotatedBox(const Box& box, double deg, const Point& center) - : _box(box), _deg(deg), _center(center) {} - RotatedBox(const Point& ll, const Point& ur, double deg, - const Point& center) - : _box(ll, ur), _deg(deg), _center(center) {} - - const Box& getBox() const { return _box; } - Box& getBox() { return _box; } - - double getDegree() const { return _deg; } - const Point& getCenter() const { return _center; } - Point& getCenter() { return _center; } - - private: - Box _box; - double _deg; - Point _center; -}; - -} // namespace geo -} // namespace util - -#endif // UTIL_GEO_BOX_H_ diff --git a/src/util/geo/Geo.h b/src/util/geo/Geo.h deleted file mode 100644 index c63e3b1..0000000 --- a/src/util/geo/Geo.h +++ /dev/null @@ -1,1375 +0,0 @@ -// Copyright 2016, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Authors: Patrick Brosi - -#ifndef UTIL_GEO_GEO_H_ -#define UTIL_GEO_GEO_H_ - -#define _USE_MATH_DEFINES - -#include -#include -#include -#include -#include -#include "util/Misc.h" -#include "util/geo/Box.h" -#include "util/geo/Line.h" -#include "util/geo/Point.h" -#include "util/geo/Polygon.h" - -// ------------------- -// Geometry stuff -// ------------------ - -namespace util { -namespace geo { - -// convenience aliases - -typedef Point DPoint; -typedef Point FPoint; -typedef Point IPoint; - -typedef LineSegment DLineSegment; -typedef LineSegment FLineSegment; -typedef LineSegment ILineSegment; - -typedef Line DLine; -typedef Line FLine; -typedef Line ILine; - -typedef Box DBox; -typedef Box FBox; -typedef Box IBox; - -typedef Polygon DPolygon; -typedef Polygon FPolygon; -typedef Polygon IPolygon; - -const static double EPSILON = 0.00000000001; -const static double RAD = 0.017453292519943295; // PI/180 - -// _____________________________________________________________________________ -template -inline Box pad(const Box& box, double padding) { - return Box(Point(box.getLowerLeft().getX() - padding, - box.getLowerLeft().getY() - padding), - Point(box.getUpperRight().getX() + padding, - box.getUpperRight().getY() + padding)); -} - -// _____________________________________________________________________________ -template -inline Point centroid(const Point p) { - return p; -} - -// _____________________________________________________________________________ -template -inline Point centroid(const LineSegment ls) { - return Point((ls.first.getX() + ls.second.getX()) / T(2), - (ls.first.getY() + ls.second.getY()) / T(2)); -} - -// _____________________________________________________________________________ -template -inline Point centroid(const Line ls) { - double x = 0, y = 0; - for (const auto& p : ls) { - x += p.getX(); - y += p.getY(); - } - return Point(x / T(ls.size()), y / T(ls.size())); -} - -// _____________________________________________________________________________ -template -inline Point centroid(const Polygon ls) { - return centroid(ls.getOuter()); -} - -// _____________________________________________________________________________ -template -inline Point centroid(const Box box) { - return centroid(LineSegment(box.getLowerLeft(), box.getUpperRight())); -} - -// _____________________________________________________________________________ -template class Geometry> -inline Point centroid(std::vector> multigeo) { - Line a; - for (const auto& g : multigeo) a.push_back(centroid(g)); - return centroid(a); -} - -// _____________________________________________________________________________ -template -inline Point rotate(const Point& p, double deg) { - UNUSED(deg); - return p; -} - -// _____________________________________________________________________________ -template -inline Point rotate(Point p, double deg, const Point& c) { - deg *= -RAD; - double si = sin(deg); - double co = cos(deg); - p = p - c; - - return Point(p.getX() * co - p.getY() * si, - p.getX() * si + p.getY() * co) + - c; -} - -// _____________________________________________________________________________ -template -inline LineSegment rotate(LineSegment geo, double deg, - const Point& c) { - geo.first = rotate(geo.first, deg, c); - geo.second = rotate(geo.second, deg, c); - return geo; -} - -// _____________________________________________________________________________ -template -inline LineSegment rotate(LineSegment geo, double deg) { - return (geo, deg, centroid(geo)); -} - -// _____________________________________________________________________________ -template -inline Line rotate(Line geo, double deg, const Point& c) { - for (size_t i = 0; i < geo.size(); i++) geo[i] = rotate(geo[i], deg, c); - return geo; -} - -// _____________________________________________________________________________ -template -inline Polygon rotate(Polygon geo, double deg, const Point& c) { - for (size_t i = 0; i < geo.getOuter().size(); i++) - geo.getOuter()[i] = rotate(geo.getOuter()[i], deg, c); - return geo; -} - -// _____________________________________________________________________________ -template