generate-shapes/src/shapevl/Collector.cpp
Patrick Brosi e15b90af9f comment
2022-02-07 09:35:28 +01:00

485 lines
15 KiB
C++

// Copyright 2018, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Authors: Patrick Brosi <brosi@informatik.uni-freiburg.de>
#include <fstream>
#include <set>
#include <string>
#include <utility>
#include "ad/cppgtfs/gtfs/Feed.h"
#include "pfaedle/Def.h"
#include "shapevl/Collector.h"
#include "shapevl/Result.h"
#include "util/geo/Geo.h"
#include "util/geo/PolyLine.h"
#include "util/geo/output/GeoJsonOutput.h"
#include "util/log/Log.h"
using util::geo::PolyLine;
using ad::cppgtfs::gtfs::Shape;
using ad::cppgtfs::gtfs::Trip;
using pfaedle::eval::Collector;
using pfaedle::eval::Result;
using util::geo::output::GeoJsonOutput;
// _____________________________________________________________________________
double Collector::add(const Trip* oldT, const Shape* oldS, const Trip* newT,
const Shape* newS) {
// This adds a new trip with a new shape to our evaluation.
_trips++;
if (!oldS) {
// If there is no original shape, we cannot compare them - abort!
_noOrigShp++;
return 0;
}
for (auto st : oldT->getStopTimes()) {
if (st.getShapeDistanceTravelled() < 0) {
// we cannot safely compare trips without shape dist travelled
// information - abort!
_noOrigShp++;
return 0;
}
}
for (auto st : newT->getStopTimes()) {
if (st.getShapeDistanceTravelled() < 0) {
// we cannot safely compare trips without shape dist travelled
// information - abort!
_noOrigShp++;
return 0;
}
}
double fd = 0;
// A "segment" is a path from station s_i to station s_{i+1}
size_t unmatchedSegments; // number of unmatched segments
double unmatchedSegmentsLength; // total _an. length of unmatched segments
std::vector<double> oldDists;
LINE oldL = getLine(oldS, &oldDists);
std::vector<double> newDists;
LINE newL = getLine(newS, &newDists);
// check dist between anchor points
if ((util::geo::latLngLen(oldL) * 1.0) / (oldL.size() * 1.0) > 1000) {
// most likely input with a degenerated shape - dont compare
_noOrigShp++;
return 0;
}
if ((util::geo::latLngLen(newL) * 1.0) / (newL.size() * 1.0) > 1000) {
// most likely input with a degenerated shape - dont compare
_noOrigShp++;
return 0;
}
std::vector<std::pair<double, double>> newLenDists;
std::vector<std::pair<double, double>> oldLenDists;
auto oldSegs = segmentize(oldT, oldL, oldDists, newLenDists);
auto newSegs = segmentize(newT, newL, newDists, oldLenDists);
for (const auto& p : oldLenDists) {
_distDiffs.push_back(fabs(p.first - p.second));
_hopDists.push_back(p.first);
}
// new lines build from cleaned-up shapes
LINE oldLCut;
LINE newLCut;
for (auto oldL : oldSegs)
oldLCut.insert(oldLCut.end(), oldL.begin(), oldL.end());
for (auto newL : newSegs) {
newLCut.insert(newLCut.end(), newL.begin(), newL.end());
}
// convert (roughly) to degrees
double SEGL = 25.0 / util::geo::M_PER_DEG;
double f = util::geo::webMercDistFactor(oldLCut.front());
// roughly half a meter
auto oldLCutS =
util::geo::simplify(oldLCut, f * (0.5 / util::geo::M_PER_DEG));
auto newLCutS =
util::geo::simplify(newLCut, f * (0.5 / util::geo::M_PER_DEG));
auto old = _dCache.find(oldLCutS);
if (old != _dCache.end()) {
auto match = old->second.find(newLCutS);
if (match != old->second.end()) {
fd = match->second;
} else {
fd = util::geo::accFrechetDistCHav(oldLCutS, newLCutS, SEGL);
_dCache[oldLCutS][newLCutS] = fd;
}
} else {
fd = util::geo::accFrechetDistCHav(oldLCutS, newLCutS, SEGL);
_dCache[oldLCutS][newLCutS] = fd;
}
auto dA = getDa(oldSegs, newSegs);
unmatchedSegments = dA.first;
unmatchedSegmentsLength = dA.second;
double totL = 0;
for (auto l : oldSegs) totL += util::geo::latLngLen(l);
// filter out shapes with a length of under 5 meters - they are most likely
// artifacts
if (totL < 5) {
_noOrigShp++;
return 0;
}
_fdSum += fd / totL;
_unmatchedSegSum += unmatchedSegments;
_unmatchedSegLengthSum += unmatchedSegmentsLength;
double avgFd = fd / totL;
double AN = static_cast<double>(unmatchedSegments) /
static_cast<double>(oldSegs.size());
double AL = unmatchedSegmentsLength / totL;
_results.insert(Result(oldT, avgFd));
if (AN <= 0.0001) _an0++;
if (AN <= 0.05) _an5++;
if (AN <= 0.1) _an10++;
if (AN <= 0.2) _an20++;
if (AN <= 0.3) _an30++;
if (AN <= 0.5) _an50++;
if (AN <= 0.7) _an70++;
if (AN <= 0.9) _an90++;
LOG(VDEBUG) << "This result (" << oldT->getId()
<< "): A_N/N = " << unmatchedSegments << "/" << oldSegs.size()
<< " = " << AN << " A_L/L = " << unmatchedSegmentsLength << "/"
<< totL << " = " << AL << " d_f = " << avgFd;
if (_reportOut) {
(*_reportOut) << std::fixed << std::setprecision(6);
(*_reportOut) << oldT->getId() << "\t" << AN << "\t" << AL << "\t" << avgFd
<< "\t" << util::geo::getWKT(oldSegs) << "\t"
<< util::geo::getWKT(newSegs) << "\t" << oldT->getRoute()->getShortName() << "\t";
for (const auto& st : oldT->getStopTimes()) {
(*_reportOut) << st.getStop()->getName() << "\t"
<< st.getStop()->getLat() << "\t"
<< st.getStop()->getLng() << "\t";
}
(*_reportOut) << "\n";
}
return avgFd;
}
// _____________________________________________________________________________
std::vector<LINE> Collector::segmentize(
const Trip* t, const LINE& shape, const std::vector<double>& dists,
std::vector<std::pair<double, double>>& lenDist) {
std::vector<LINE> ret;
if (t->getStopTimes().size() < 2) return ret;
POLYLINE pl(shape);
std::vector<double> cuts;
size_t i = 0;
for (const auto& st : t->getStopTimes()) {
cuts.push_back(st.getShapeDistanceTravelled());
i++;
}
size_t to = std::upper_bound(dists.begin(), dists.end(), cuts[0]) -
dists.begin();
POINT lastP;
if (to >= dists.size()) {
lastP = shape.back();
} else if (to == 0) {
lastP = shape.front();
} else {
double progr = (cuts[0] - dists[to - 1]) / (dists[to] - dists[to - 1]);
lastP = shape[to - 1];
lastP.setX(lastP.getX() + progr * (shape[to].getX() - shape[to-1].getX()));
lastP.setY(lastP.getY() + progr * (shape[to].getY() - shape[to-1].getY()));
}
for (size_t i = 1; i < cuts.size(); i++) {
size_t to = std::upper_bound(dists.begin(), dists.end(), cuts[i]) -
dists.begin();
POINT curP;
if (to >= dists.size()) {
curP = shape.back();
} else if (to == 0) {
curP = shape.front();
} else {
curP = shape[to - 1];
double progr = (cuts[i] - dists[to - 1]) / (dists[to] - dists[to - 1]);
curP.setX(curP.getX() + progr * (shape[to].getX() - shape[to-1].getX()));
curP.setY(curP.getY() + progr * (shape[to].getY() - shape[to-1].getY()));
}
auto curL = pl.getSegment(lastP, curP).getLine();
double dist =
util::geo::haversine(t->getStopTimes()[i - 1].getStop()->getLat(),
t->getStopTimes()[i - 1].getStop()->getLng(),
t->getStopTimes()[i].getStop()->getLat(),
t->getStopTimes()[i].getStop()->getLng());
double len = util::geo::latLngLen(curL);
lenDist.push_back({dist, len});
ret.push_back(curL);
lastP = curP;
}
return ret;
}
// _____________________________________________________________________________
LINE Collector::getLine(const Shape* s, std::vector<double>* dists) {
LINE ret;
for (size_t i = 0; i < s->getPoints().size(); i++) {
ret.push_back({s->getPoints()[i].lng, s->getPoints()[i].lat});
(*dists).push_back(s->getPoints()[i].travelDist);
}
return ret;
}
// _____________________________________________________________________________
const std::set<Result>& Collector::getResults() const { return _results; }
// _____________________________________________________________________________
double Collector::getAvgDist() const { return _fdSum / _results.size(); }
// _____________________________________________________________________________
void Collector::printCsv(std::ostream* os,
const std::set<Result>& result) const {
for (auto r : result) (*os) << r.getDist() << "\n";
}
// _____________________________________________________________________________
double Collector::getAcc() const {
return static_cast<double>(_an0) / static_cast<double>(_results.size());
}
// _____________________________________________________________________________
void Collector::printShortStats(std::ostream* os) const {
if (_results.size()) {
(*os) << (static_cast<double>(_an0) /
static_cast<double>(_results.size())) *
100
<< ",";
(*os) << (static_cast<double>(_an5) /
static_cast<double>(_results.size())) *
100
<< ",";
(*os) << (static_cast<double>(_an10) /
static_cast<double>(_results.size())) *
100
<< ",";
(*os) << (static_cast<double>(_an20) /
static_cast<double>(_results.size())) *
100
<< ",";
(*os) << (static_cast<double>(_an30) /
static_cast<double>(_results.size())) *
100
<< ",";
(*os) << (static_cast<double>(_an50) /
static_cast<double>(_results.size())) *
100
<< ",";
(*os) << (static_cast<double>(_an70) /
static_cast<double>(_results.size())) *
100
<< ",";
(*os) << (static_cast<double>(_an90) /
static_cast<double>(_results.size())) *
100;
}
}
// _____________________________________________________________________________
void Collector::printStats(std::ostream* os) const {
(*os) << std::setfill(' ') << std::setw(50) << " # of trips: " << _trips
<< "\n";
(*os) << std::setfill(' ') << std::setw(50)
<< " # of trips new shapes were matched for: " << _results.size()
<< "\n";
(*os) << std::setw(50) << " # of trips without input shapes: " << _noOrigShp
<< "\n";
if (_results.size()) {
(*os) << std::setw(50) << " highest avg frechet distance to input shapes: "
<< (--_results.end())->getDist() << " (on trip #"
<< (--_results.end())->getTrip()->getId() << ")\n";
(*os) << std::setw(50) << " lowest distance to input shapes: "
<< (_results.begin())->getDist() << " (on trip #"
<< (_results.begin())->getTrip()->getId() << ")\n";
(*os) << std::setw(50)
<< " averaged avg frechet distance: " << getAvgDist() << "\n";
(*os) << "\n";
(*os) << " an-0: "
<< (static_cast<double>(_an0) /
static_cast<double>(_results.size())) *
100
<< " %"
<< "\n";
(*os) << " an-5: "
<< (static_cast<double>(_an5) /
static_cast<double>(_results.size())) *
100
<< " %"
<< "\n";
(*os) << " an-10: "
<< (static_cast<double>(_an10) /
static_cast<double>(_results.size())) *
100
<< " %"
<< "\n";
(*os) << " an-20: "
<< (static_cast<double>(_an20) /
static_cast<double>(_results.size())) *
100
<< " %"
<< "\n";
(*os) << " acc-30: "
<< (static_cast<double>(_an30) /
static_cast<double>(_results.size())) *
100
<< " %"
<< "\n";
(*os) << " acc-50: "
<< (static_cast<double>(_an50) /
static_cast<double>(_results.size())) *
100
<< " %"
<< "\n";
(*os) << " acc-70: "
<< (static_cast<double>(_an70) /
static_cast<double>(_results.size())) *
100
<< " %"
<< "\n";
(*os) << " acc-90: "
<< (static_cast<double>(_an90) /
static_cast<double>(_results.size())) *
100
<< " %"
<< "\n";
}
(*os) << std::endl;
}
// _____________________________________________________________________________
std::map<string, double> Collector::getStats() {
std::map<string, double> stats;
if (_distDiffs.size()) {
auto i = _distDiffs.begin() + _distDiffs.size() / 2;
// std::nth_element makes a partial sort of the first n elements
std::nth_element(_distDiffs.begin(), i, _distDiffs.end());
stats["median-dist-diff"] = *i;
} else {
stats["median-dist-diff"] = -1;
}
if (_hopDists.size()) {
double s = 0;
for (auto d : _hopDists) s += d;
stats["avg-hop-dist"] = s / (_hopDists.size() * 1.0);
} else {
stats["avg-hop-dist"] = -1;
}
stats["num-trips"] = _trips;
stats["num-trips-matched"] = _results.size();
stats["num-trips-wo-shapes"] = _noOrigShp;
stats["avg-fr"] = getAvgDist();
if (_results.size()) {
stats["max-avg-frech-dist"] = (--_results.end())->getDist();
} else {
stats["max-avg-frech-dist"] = -1;
}
stats["an-0"] =
(static_cast<double>(_an0) / static_cast<double>(_results.size())) * 100;
stats["an-5"] =
(static_cast<double>(_an5) / static_cast<double>(_results.size())) * 100;
stats["an-10"] =
(static_cast<double>(_an10) / static_cast<double>(_results.size())) * 100;
stats["an-20"] =
(static_cast<double>(_an20) / static_cast<double>(_results.size())) * 100;
stats["an-30"] =
(static_cast<double>(_an30) / static_cast<double>(_results.size())) * 100;
stats["an-50"] =
(static_cast<double>(_an50) / static_cast<double>(_results.size())) * 100;
stats["an-70"] =
(static_cast<double>(_an70) / static_cast<double>(_results.size())) * 100;
stats["an-90"] =
(static_cast<double>(_an90) / static_cast<double>(_results.size())) * 100;
return stats;
}
// _____________________________________________________________________________
std::pair<size_t, double> Collector::getDa(const std::vector<LINE>& a,
const std::vector<LINE>& b) {
assert(a.size() == b.size());
std::pair<size_t, double> ret{0, 0};
// convert (roughly) to degrees
double SEGL = 25 / util::geo::M_PER_DEG;
double MAX = 100;
for (size_t i = 0; i < a.size(); i++) {
double fdMeter = 0;
double f = util::geo::webMercDistFactor(a[i].front());
// roughly half a meter
auto aSimpl = util::geo::simplify(a[i], f * (0.5 / util::geo::M_PER_DEG));
auto bSimpl = util::geo::simplify(b[i], f * (0.5 / util::geo::M_PER_DEG));
auto old = _dACache.find(aSimpl);
if (old != _dACache.end()) {
auto match = old->second.find(bSimpl);
if (match != old->second.end()) {
fdMeter = match->second;
} else {
fdMeter = util::geo::frechetDistHav(aSimpl, bSimpl, SEGL);
_dACache[aSimpl][bSimpl] = fdMeter;
}
} else {
fdMeter = util::geo::frechetDistHav(aSimpl, bSimpl, SEGL);
_dACache[aSimpl][bSimpl] = fdMeter;
}
if (fdMeter >= MAX) {
ret.first++;
ret.second += util::geo::latLngLen(aSimpl);
}
}
return ret;
}