Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • debian-gis-team/osm2pgsql
1 result
Show changes
Commits on Source (14)
language: cpp
sudo: false
addons:
apt:
sources:
- boost-latest
- ubuntu-toolchain-r-test
packages:
- g++-4.8
- libexpat1-dev
- libpq-dev
- libbz2-dev
- libproj-dev
- lua5.2
- liblua5.2-dev
- libluajit-5.1-dev
- libboost1.55-dev
- libboost-system1.55-dev
- libboost-filesystem1.55-dev
language: generic
sudo: required
git:
depth: 1
services:
- postgresql
addons_shortcuts:
addons_clang38_pg92: &clang38_pg92
postgresql: '9.2'
apt:
sources: ['ubuntu-toolchain-r-test', 'llvm-toolchain-trusty-3.8']
packages: ['clang-3.8', 'postgresql-9.2-postgis-2.3',
'python3-psycopg2', 'libexpat1-dev', 'libpq-dev', 'libbz2-dev', 'libproj-dev',
'lua5.2', 'liblua5.2-dev', 'libluajit-5.1-dev',
'libboost1.55-dev', 'libboost-system1.55-dev', 'libboost-filesystem1.55-dev']
addons_clang7_pg96: &clang7_pg96
postgresql: '9.6'
apt:
update: true
sources:
- sourceline: 'deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty-7 main'
key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
- ubuntu-toolchain-r-test
packages: ['clang-7','postgresql-9.6-postgis-2.3',
'python3-psycopg2', 'libexpat1-dev', 'libpq-dev', 'libbz2-dev', 'libproj-dev',
'lua5.2', 'liblua5.2-dev', 'libluajit-5.1-dev',
'libboost1.55-dev', 'libboost-system1.55-dev', 'libboost-filesystem1.55-dev']
addons_gcc48_pg96: &gcc48_pg96
postgresql: '9.6'
apt:
sources: ["ubuntu-toolchain-r-test"]
packages: ['g++-4.8','postgresql-9.6-postgis-2.3',
'python3-psycopg2', 'libexpat1-dev', 'libpq-dev', 'libbz2-dev', 'libproj-dev',
'lua5.2', 'liblua5.2-dev', 'libluajit-5.1-dev',
'libboost1.55-dev', 'libboost-system1.55-dev', 'libboost-filesystem1.55-dev']
addons_gcc8_pg96: &gcc8_pg96
postgresql: '9.6'
apt:
sources: ["ubuntu-toolchain-r-test"]
packages: ['g++-8','postgresql-9.6-postgis-2.3',
'python3-psycopg2', 'libexpat1-dev', 'libpq-dev', 'libbz2-dev', 'libproj-dev',
'lua5.2', 'liblua5.2-dev', 'libluajit-5.1-dev',
'libboost1.55-dev', 'libboost-system1.55-dev', 'libboost-filesystem1.55-dev']
# env: T="...." // please set an unique test id (T="..")
matrix:
include:
# ---- Linux + CLANG ---------------------------
- os: linux
compiler: clang
env: CXXFLAGS="-pedantic -Werror" LUAJIT_OPTION="OFF"
- os: linux
compiler: gcc
env: RUNTEST="-L NoDB" CXXFLAGS="-pedantic -Werror -fsanitize=address" LUAJIT_OPTION="OFF"
dist: trusty
compiler: "clang-3.8"
env: T="clang38_pg92_dbtest" LUAJIT_OPTION="OFF"
CXXFLAGS="-pedantic -Wextra -Werror"
CC=clang-3.8 CXX=clang++-3.8
addons: *clang38_pg92
- os: linux
compiler: clang
env: CXXFLAGS="-pedantic -Werror" LUAJIT_OPTION="ON"
- os: linux
compiler: gcc
env: RUNTEST="-L NoDB" CXXFLAGS="-pedantic -Werror -fsanitize=address" LUAJIT_OPTION="ON"
dist: trusty
compiler: "clang-7"
env: T="clang7_pg96_dbtest_luajit" LUAJIT_OPTION="ON"
CXXFLAGS="-pedantic -Wextra -Werror"
CC=clang-7 CXX=clang++-7
addons: *clang7_pg96
# ---- OSX + CLANG ---------------------------
- os: osx
compiler: clang
env: RUNTEST="-L NoDB" CXXFLAGS="-pedantic -Werror -fsanitize=address" LUAJIT_OPTION="OFF"
env: T="osx_clang_NoDB" LUAJIT_OPTION="OFF" TEST_NODB=1
CXXFLAGS="-pedantic -Wextra -Werror"
before_install:
- brew install lua; brew install lua
before_script:
- xml2-config --version
- proj | head -n1
- lua -v
# ---- Linux + GCC ---------------------------
- os: linux
dist: trusty
compiler: "gcc-4.8"
env: T="gcc48_pg96_dbtest" LUAJIT_OPTION="OFF"
CXXFLAGS="-pedantic -Wextra -Werror"
CC=gcc-4.8 CXX=g++-4.8
addons: *gcc48_pg96
- os: linux
dist: trusty
compiler: gcc-8
env: T="gcc8_pg96_dbtest_luajit" LUAJIT_OPTION="ON"
CXXFLAGS="-pedantic -Wextra -Werror"
CC=gcc-8 CXX=g++-8
addons: *gcc8_pg96
before_install:
- if [[ $TRAVIS_OS_NAME == 'osx' ]]; then
brew install lua;
fi
# update versions
install:
- if [[ $CC == 'gcc' ]]; then
export CC=gcc-4.8;
fi
- if [[ $CXX == 'g++' ]]; then
export CXX=g++-4.8;
fi
- dpkg -l | grep -E 'lua|proj|xml|bz2|postgis|zlib|boost|expat' # checking available versions
before_script:
- psql -U postgres -c "SELECT version()"
- psql -U postgres -c "CREATE EXTENSION postgis"
- psql -U postgres -c "CREATE EXTENSION hstore"
- psql -U postgres -c "SELECT PostGIS_Full_Version()"
- $CXX --version
- xml2-config --version
- proj | head -n1
......@@ -55,8 +115,15 @@ script:
- mkdir build && cd build
- cmake .. -DBUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Debug -DWITH_LUAJIT=$LUAJIT_OPTION
- make -j2
- echo "Running tests that does not require PostgreSQL server"
- if [[ $RUNTEST ]]; then ctest -VV $RUNTEST; fi
- echo "Running tests ... "
- if [[ $TEST_NODB ]]; then
ctest -VV -L NoDB;
else
PG_VERSION=`psql -U postgres -t -c "SELECT version()" | head -n 1 | cut -d ' ' -f 3 | cut -d . -f 1-2`;
pg_virtualenv -v $PG_VERSION ctest -VV;
fi
after_failure:
- # rerun make, but verbosely
make VERBOSE=1
# end of .travis
set(PACKAGE osm2pgsql)
set(PACKAGE_NAME osm2pgsql)
set(PACKAGE_VERSION 0.96.0)
set(PACKAGE_VERSION 1.0.0)
cmake_minimum_required(VERSION 2.8.7)
......@@ -168,17 +168,18 @@ if (NOT HAVE_UNISTD_H AND NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/unistd.h)
endif()
set(osm2pgsql_lib_SOURCES
db-copy.cpp
expire-tiles.cpp
geometry-processor.cpp
id-tracker.cpp
middle-pgsql.cpp
middle-ram.cpp
middle.cpp
node-persistent-cache.cpp
node-ram-cache.cpp
options.cpp
osmdata.cpp
osmium-builder.cpp
gazetteer-style.cpp
output-gazetteer.cpp
output-multi.cpp
output-null.cpp
......@@ -197,6 +198,7 @@ set(osm2pgsql_lib_SOURCES
tagtransform-c.cpp
util.cpp
wildcmp.cpp
db-copy.hpp
expire-tiles.hpp
geometry-processor.hpp
id-tracker.hpp
......@@ -209,6 +211,7 @@ set(osm2pgsql_lib_SOURCES
osmdata.hpp
osmium-builder.hpp
osmtypes.hpp
gazetteer-style.hpp
output-gazetteer.hpp
output-multi.hpp
output-null.hpp
......
......@@ -73,12 +73,24 @@ executing ``ctest``.
Regression tests require python and psycopg to be installed. On Ubuntu run:
```sh
sudo apt-get install python-psycopg2
sudo apt-get install python3-psycopg2
```
Most of these tests depend on being able to set up a database and run osm2pgsql
against it. You need to ensure that PostgreSQL is running and that your user is
a superuser of that system. To do that, run:
against it. This is most easily done using ``pg_virtualenv``. Just run
```sh
pg_virtualenv ctest
```
``pg_virtualenv`` creates a separate postgres server instance. The test databases
are created in this instance and the complete server is destroyed after the
tests are finished. ctest also calls appropriate fixtures that create the
separate tablespace required for some tests.
When running without ``pg_virtualenv``, you need to ensure that PostgreSQL is
running and that your user is a superuser of that system. You also need to
create an appropriate test tablespace manually. To do that, run:
```sh
sudo -u postgres createuser -s $USER
......@@ -94,9 +106,6 @@ to be a bug, please check to see if it is a known issue at
https://github.com/openstreetmap/osm2pgsql/issues and, if it's not
already known, report it there.
If running the tests in a virtual machine, allocate sufficient disk space for a
20GB flat nodes file.
### Performance Testing
If performance testing with a full planet import is required, indicate what
......
......@@ -21,8 +21,7 @@ Nominatim, or general analysis.
Most Linux distributions include osm2pgsql. It is also available on macOS with [Homebrew](http://brew.sh/).
Unoffical builds for Windows are available from [AppVeyor](https://ci.appveyor.com/project/openstreetmap/osm2pgsql/history) but you need to find the right build artifacts.
For the latest release 0.96.0, you may download a
[32bit version](https://ci.appveyor.com/api/projects/openstreetmap/osm2pgsql/artifacts/osm2pgsql_Release_x86.zip?tag=0.96.0&job=Environment%3A%20arch%3Dx86) or [64bit version](https://ci.appveyor.com/api/projects/openstreetmap/osm2pgsql/artifacts/osm2pgsql_Release_x64.zip?tag=0.96.0&job=Environment%3A%20arch%3Dx64).
Builds for releases may also be downloaded from the [OpenStreetMap Dev server](https://lonvia.dev.openstreetmap.org/osm2pgsql-winbuild/releases/).
## Building ##
......@@ -172,6 +171,36 @@ null backend for testing. For flexibility a new [multi](docs/multi.md)
backend is also available which allows the configuration of custom
PostgreSQL tables instead of those provided in the pgsql backend.
## LuaJIT support ##
To speed up Lua tag transformations, [LuaJIT](http://luajit.org/) can be optionally
enabled on supported platforms. Performance measurements have shown about 25%
runtime reduction for a planet import, with about 40% reduction on parsing time.
On a Debian or Ubuntu system, this can be done with:
```sh
sudo apt install libluajit-5.1-dev
```
Configuration parameter `WITH_LUAJIT=ON` needs to be added to enable LuaJIT.
Otherwise make and installation steps are identical to the description above.
```sh
cmake -D WITH_LUAJIT=ON ..
```
Use `osm2pgsql --version` to verify that the build includes LuaJIT support:
```sh
./osm2pgsql --version
osm2pgsql version 0.96.0 (64 bit id space)
Compiled using the following library versions:
Libosmium 2.15.0
Lua 5.1.4 (LuaJIT 2.1.0-beta3)
```
## Contributing ##
We welcome contributions to osm2pgsql. If you would like to report an issue,
......
......@@ -10,7 +10,7 @@
find_path(LUAJIT_INCLUDE_DIR luajit.h
HINTS
ENV LUA_DIR
PATH_SUFFIXES include/luajit-2.0 include
PATH_SUFFIXES include/luajit-2.0 include/luajit-2.1 include
PATHS
~/Library/Frameworks
/Library/Frameworks
......
......@@ -71,6 +71,9 @@ find_path(OSMIUM_INCLUDE_DIR osmium/version.hpp
# Check libosmium version number
if(Osmium_FIND_VERSION)
if(NOT EXISTS "${OSMIUM_INCLUDE_DIR}/osmium/version.hpp")
message(FATAL_ERROR "Missing ${OSMIUM_INCLUDE_DIR}/osmium/version.hpp. Either your libosmium version is too old, or libosmium wasn't found in the place you said.")
endif()
file(STRINGS "${OSMIUM_INCLUDE_DIR}/osmium/version.hpp" _libosmium_version_define REGEX "#define LIBOSMIUM_VERSION_STRING")
if("${_libosmium_version_define}" MATCHES "#define LIBOSMIUM_VERSION_STRING \"([0-9.]+)\"")
set(_libosmium_version "${CMAKE_MATCH_1}")
......@@ -111,7 +114,7 @@ endif()
if(Osmium_USE_PBF)
find_package(ZLIB)
find_package(Threads)
find_package(Protozero 1.5.1)
find_package(Protozero 1.6.3)
list(APPEND OSMIUM_EXTRA_FIND_VARS ZLIB_FOUND Threads_FOUND PROTOZERO_INCLUDE_DIR)
if(ZLIB_FOUND AND Threads_FOUND AND PROTOZERO_FOUND)
......@@ -324,7 +327,7 @@ if(MSVC)
add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN -D_CRT_SECURE_NO_WARNINGS)
endif()
if(APPLE)
if(APPLE AND "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
# following only available from cmake 2.8.12:
# add_compile_options(-stdlib=libc++)
# so using this instead:
......
#cmakedefine HAVE_LSEEK64 1
#cmakedefine HAVE_LUA 1
#cmakedefine HAVE_LUAJIT 1
#cmakedefine HAVE_POSIX_FADVISE 1
#cmakedefine HAVE_POSIX_FALLOCATE 1
#cmakedefine HAVE_SYNC_FILE_RANGE 1
......
#include <boost/format.hpp>
#include <cassert>
#include <cstdio>
#include <future>
#include <thread>
#include "db-copy.hpp"
#include "pgsql.hpp"
using fmt = boost::format;
db_copy_thread_t::db_copy_thread_t(std::string const &conninfo)
: m_conninfo(conninfo), m_conn(nullptr)
{
m_worker = std::thread([this]() {
try {
worker_thread();
} catch (std::runtime_error const &e) {
fprintf(stderr, "DB writer thread failed due to ERROR: %s\n",
e.what());
exit(2);
}
});
}
db_copy_thread_t::~db_copy_thread_t() { finish(); }
void db_copy_thread_t::add_buffer(std::unique_ptr<db_cmd_t> &&buffer)
{
assert(m_worker.joinable()); // thread must not have been finished
std::unique_lock<std::mutex> lock(m_queue_mutex);
m_worker_queue.push_back(std::move(buffer));
m_queue_cond.notify_one();
}
void db_copy_thread_t::sync_and_wait()
{
std::promise<void> barrier;
std::future<void> sync = barrier.get_future();
add_buffer(std::unique_ptr<db_cmd_t>(new db_cmd_sync_t(std::move(barrier))));
sync.wait();
}
void db_copy_thread_t::finish()
{
if (m_worker.joinable()) {
finish_copy();
add_buffer(std::unique_ptr<db_cmd_t>(new db_cmd_finish_t()));
m_worker.join();
}
}
void db_copy_thread_t::worker_thread()
{
connect();
bool done = false;
while (!done) {
std::unique_ptr<db_cmd_t> item;
{
std::unique_lock<std::mutex> lock(m_queue_mutex);
if (m_worker_queue.empty()) {
m_queue_cond.wait(lock);
continue;
}
item = std::move(m_worker_queue.front());
m_worker_queue.pop_front();
}
switch (item->type) {
case db_cmd_t::Cmd_copy:
write_to_db(static_cast<db_cmd_copy_t *>(item.get()));
break;
case db_cmd_t::Cmd_sync:
finish_copy();
static_cast<db_cmd_sync_t *>(item.get())->barrier.set_value();
break;
case db_cmd_t::Cmd_finish:
done = true;
break;
}
}
finish_copy();
disconnect();
}
void db_copy_thread_t::connect()
{
assert(!m_conn);
PGconn *conn = PQconnectdb(m_conninfo.c_str());
if (PQstatus(conn) != CONNECTION_OK)
throw std::runtime_error(
(fmt("Connection to database failed: %1%\n") % PQerrorMessage(conn))
.str());
m_conn = conn;
// Let commits happen faster by delaying when they actually occur.
pgsql_exec_simple(m_conn, PGRES_COMMAND_OK,
"SET synchronous_commit TO off;");
}
void db_copy_thread_t::disconnect()
{
if (!m_conn)
return;
PQfinish(m_conn);
m_conn = nullptr;
}
void db_copy_thread_t::write_to_db(db_cmd_copy_t *buffer)
{
if (!buffer->deletables.empty() ||
(m_inflight && !buffer->target->same_copy_target(*m_inflight.get())))
finish_copy();
if (!buffer->deletables.empty())
delete_rows(buffer);
if (!m_inflight)
start_copy(buffer->target);
pgsql_CopyData(buffer->target->name.c_str(), m_conn, buffer->buffer);
}
void db_copy_thread_t::delete_rows(db_cmd_copy_t *buffer)
{
assert(!m_inflight);
std::string sql = "DELETE FROM ";
sql.reserve(buffer->target->name.size() + buffer->deletables.size() * 15 +
30);
sql += buffer->target->name;
sql += " WHERE ";
sql += buffer->target->id;
sql += " IN (";
for (auto id : buffer->deletables) {
sql += std::to_string(id);
sql += ',';
}
sql[sql.size() - 1] = ')';
pgsql_exec_simple(m_conn, PGRES_COMMAND_OK, sql);
}
void db_copy_thread_t::start_copy(std::shared_ptr<db_target_descr_t> const &target)
{
m_inflight = target;
std::string copystr = "COPY ";
copystr.reserve(target->name.size() + target->rows.size() + 14);
copystr += target->name;
if (!target->rows.empty()) {
copystr += '(';
copystr += target->rows;
copystr += ')';
}
copystr += " FROM STDIN";
pgsql_exec_simple(m_conn, PGRES_COPY_IN, copystr);
m_inflight = target;
}
void db_copy_thread_t::finish_copy()
{
if (!m_inflight)
return;
if (PQputCopyEnd(m_conn, nullptr) != 1)
throw std::runtime_error((fmt("stop COPY_END for %1% failed: %2%\n") %
m_inflight->name %
PQerrorMessage(m_conn))
.str());
pg_result_t res(PQgetResult(m_conn));
if (PQresultStatus(res.get()) != PGRES_COMMAND_OK)
throw std::runtime_error((fmt("result COPY_END for %1% failed: %2%\n") %
m_inflight->name %
PQerrorMessage(m_conn))
.str());
m_inflight.reset();
}
db_copy_mgr_t::db_copy_mgr_t(std::shared_ptr<db_copy_thread_t> const &processor)
: m_processor(processor)
{}
void db_copy_mgr_t::new_line(std::shared_ptr<db_target_descr_t> const &table)
{
if (!m_current || !m_current->target->same_copy_target(*table.get())) {
if (m_current) {
m_processor->add_buffer(std::move(m_current));
}
m_current.reset(new db_cmd_copy_t(table));
}
}
void db_copy_mgr_t::delete_id(osmid_t osm_id)
{
assert(m_current);
m_current->deletables.push_back(osm_id);
}
void db_copy_mgr_t::sync()
{
// finish any ongoing copy operations
if (m_current) {
m_processor->add_buffer(std::move(m_current));
}
m_processor->sync_and_wait();
}
void db_copy_mgr_t::finish_line()
{
assert(m_current);
auto &buf = m_current->buffer;
assert(!buf.empty());
// Expect that a column has been written last which ended in a '\t'.
// Replace it with the row delimiter '\n'.
auto sz = buf.size();
assert(buf[sz - 1] == '\t');
buf[sz - 1] = '\n';
if (sz > db_cmd_copy_t::Max_buf_size - 100) {
m_processor->add_buffer(std::move(m_current));
}
}
#ifndef DB_COPY_HPP
#define DB_COPY_HPP
#include <condition_variable>
#include <deque>
#include <future>
#include <memory>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
#include "osmtypes.hpp"
struct pg_conn;
/**
* Table information necessary for building SQL queries.
*/
struct db_target_descr_t
{
/// Name of the target table for the copy operation.
std::string name;
/// Comma-separated list of rows for copy operation (when empty: all rows)
std::string rows;
/// Name of id column used when deleting objects.
std::string id;
/**
* Check if the buffer would use exactly the same copy operation.
*/
bool same_copy_target(db_target_descr_t const &other) const noexcept
{
return (this == &other) || (name == other.name && rows == other.rows);
}
db_target_descr_t() = default;
db_target_descr_t(char const *n, char const *i, char const *r = "")
: name(n), rows(r), id(i)
{
}
};
/**
* A command for the copy thread to execute.
*/
class db_cmd_t
{
public:
enum cmd_t
{
Cmd_copy, ///< Copy buffer content into given target.
Cmd_sync, ///< Synchronize with parent.
Cmd_finish
};
virtual ~db_cmd_t() = default;
cmd_t type;
protected:
explicit db_cmd_t(cmd_t t)
: type(t)
{
}
};
struct db_cmd_copy_t : public db_cmd_t
{
enum { Max_buf_size = 10 * 1024 * 1024 };
/// Name of the target table for the copy operation
std::shared_ptr<db_target_descr_t> target;
/// Vector with object to delete before copying
std::vector<osmid_t> deletables;
/// actual copy buffer
std::string buffer;
explicit db_cmd_copy_t(std::shared_ptr<db_target_descr_t> const &t)
: db_cmd_t(db_cmd_t::Cmd_copy), target(t)
{
buffer.reserve(Max_buf_size);
}
};
struct db_cmd_sync_t : public db_cmd_t
{
std::promise<void> barrier;
explicit db_cmd_sync_t(std::promise<void> &&b)
: db_cmd_t(db_cmd_t::Cmd_sync), barrier(std::move(b))
{
}
};
struct db_cmd_finish_t : public db_cmd_t
{
db_cmd_finish_t() : db_cmd_t(db_cmd_t::Cmd_finish) {}
};
/**
* The worker thread that streams copy data into the database.
*/
class db_copy_thread_t
{
public:
db_copy_thread_t(std::string const &conninfo);
~db_copy_thread_t();
/**
* Add another command for the worker.
*/
void add_buffer(std::unique_ptr<db_cmd_t> &&buffer);
/**
* Send sync command and wait for the notification.
*/
void sync_and_wait();
/**
* Finish the copy process.
*
* Only returns when all remaining data has been committed to the
* database.
*/
void finish();
private:
void worker_thread();
void connect();
void disconnect();
void write_to_db(db_cmd_copy_t *buffer);
void start_copy(std::shared_ptr<db_target_descr_t> const &target);
void finish_copy();
void delete_rows(db_cmd_copy_t *buffer);
std::string m_conninfo;
pg_conn *m_conn;
std::thread m_worker;
std::mutex m_queue_mutex;
std::condition_variable m_queue_cond;
std::deque<std::unique_ptr<db_cmd_t>> m_worker_queue;
// Target for copy operation currently ongoing.
std::shared_ptr<db_target_descr_t> m_inflight;
};
/**
* Management class that fills and manages copy buffers.
*/
class db_copy_mgr_t
{
public:
db_copy_mgr_t(std::shared_ptr<db_copy_thread_t> const &processor);
/**
* Start a new table row.
*
* Also starts a new buffer if either the table is not the same as
* the table of currently buffered data or no buffer is pending.
*/
void new_line(std::shared_ptr<db_target_descr_t> const &table);
/**
* Finish a table row.
*
* Adds the row delimiter to the buffer. If the buffer is at capacity
* it will be forwarded to the copy thread.
*/
void finish_line();
/**
* Add many simple columns.
*
* See add_column().
*/
template <typename T, typename ...ARGS>
void add_columns(T value, ARGS&&... args)
{
add_column(value);
add_columns(args...);
}
template <typename T>
void add_columns(T value)
{
add_column(value);
}
/**
* Add a column entry of simple type.
*
* Writes the column with the escaping apporpriate for the type and
* a column delimiter.
*/
template <typename T>
void add_column(T value)
{
add_value(value);
m_current->buffer += '\t';
}
/**
* Add an empty column.
*
* Adds a NULL value for the column.
*/
void add_null_column() { m_current->buffer += "\\N\t"; }
/**
* Start an array column.
*
* An array is a list of simple elements of the same type.
*
* Must be finished with a call to finish_array().
*/
void new_array() { m_current->buffer += "{"; }
/**
* Add a single value to an array column.
*
* Adds the value in the format appropriate for an array and a value
* separator.
*/
template <typename T>
void add_array_elem(T value)
{
add_value(value);
m_current->buffer += ',';
}
void add_array_elem(std::string const &s) { add_array_elem(s.c_str()); }
void add_array_elem(char const *s)
{
assert(m_current);
m_current->buffer += '"';
add_escaped_string(s);
m_current->buffer += "\",";
}
/**
* Finish an array column previously started with new_array().
*
* The array may be empty. If it does contain elements, the separator after
* the final element is replaced with the closing array bracket.
*/
void finish_array()
{
auto idx = m_current->buffer.size() - 1;
if (m_current->buffer[idx] == '{')
m_current->buffer += '}';
else
m_current->buffer[idx] = '}';
m_current->buffer += '\t';
}
/**
* Start a hash column.
*
* A hash column contains a list of key/value pairs. May be represented
* by a hstore or json in Postgresql.
*
* currently a hstore column is written which does not have any start
* markers.
*
* Must be closed with a finish_hash() call.
*/
void new_hash() { /* nothing */}
void add_hash_elem(std::string const &k, std::string const &v)
{
add_hash_elem(k.c_str(), v.c_str());
}
/**
* Add a key/value pair to a hash column.
*
* Key and value must be strings and will be appropriately escaped.
* A separator for the next pair is added at the end.
*/
void add_hash_elem(char const *k, char const *v)
{
m_current->buffer += '"';
add_escaped_string(k);
m_current->buffer += "\"=>\"";
add_escaped_string(v);
m_current->buffer += "\",";
}
/**
* Add a key/value pair to a hash column without escaping.
*
* Key and value must be strings and will NOT be appropriately escaped.
* A separator for the next pair is added at the end.
*/
void add_hash_elem_noescape(char const *k, char const *v)
{
m_current->buffer += '"';
m_current->buffer += k;
m_current->buffer += "\"=>\"";
m_current->buffer += v;
m_current->buffer += "\",";
}
/**
* Add a key (unescaped) and a numeric value to a hash column.
*
* Key must be string and come from a safe source because it will NOT be
* escaped! The value should be convertible using std::to_string.
* A separator for the next pair is added at the end.
*
* This method is suitable to insert safe input, e.g. numeric OSM metadata
* (eg. uid) but not unsafe input like user names.
*/
template <typename T>
void add_hstore_num_noescape(char const *k, T const value)
{
m_current->buffer += '"';
m_current->buffer += k;
m_current->buffer += "\"=>\"";
m_current->buffer += std::to_string(value);
m_current->buffer += "\",";
}
/**
* Close a hash previously started with new_hash().
*
* The hash may be empty. If elements were present, the separator
* of the final element is overwritten with the closing \t.
*/
void finish_hash()
{
auto idx = m_current->buffer.size() - 1;
if (!m_current->buffer.empty() && m_current->buffer[idx] == ',') {
m_current->buffer[idx] = '\t';
} else {
m_current->buffer += '\t';
}
}
/**
* Add a column with the given WKB geometry in WKB hex format.
*
* The geometry is converted on-the-fly from WKB binary to WKB hex.
*/
void add_hex_geom(std::string const &wkb)
{
char const *lookup_hex = "0123456789ABCDEF";
for (char c : wkb) {
m_current->buffer += lookup_hex[(c >> 4) & 0xf];
m_current->buffer += lookup_hex[c & 0xf];
}
m_current->buffer += '\t';
}
/**
* Mark an OSM object for deletion in the current table.
*
* The object is guaranteed to be deleted before any lines
* following the delete_id() are inserted.
*/
void delete_id(osmid_t osm_id);
/**
* Synchronize with worker.
*
* Only returns when all previously issued commands are done.
*/
void sync();
private:
template <typename T>
void add_value(T value)
{
m_current->buffer += std::to_string(value);
}
void add_value(double value)
{
char tmp[32];
snprintf(tmp, sizeof(tmp), "%g", value);
m_current->buffer += tmp;
}
void add_value(std::string const &s) { add_value(s.c_str()); }
void add_value(char const *s)
{
assert(m_current);
for (char const *c = s; *c; ++c) {
switch (*c) {
case '"':
m_current->buffer += "\\\"";
break;
case '\\':
m_current->buffer += "\\\\";
break;
case '\n':
m_current->buffer += "\\n";
break;
case '\r':
m_current->buffer += "\\r";
break;
case '\t':
m_current->buffer += "\\t";
break;
default:
m_current->buffer += *c;
break;
}
}
}
void add_escaped_string(char const *s)
{
for (char const *c = s; *c; ++c) {
switch (*c) {
case '"':
m_current->buffer += "\\\\\"";
break;
case '\\':
m_current->buffer += "\\\\\\\\";
break;
case '\n':
m_current->buffer += "\\n";
break;
case '\r':
m_current->buffer += "\\r";
break;
case '\t':
m_current->buffer += "\\t";
break;
default:
m_current->buffer += *c;
break;
}
}
}
std::shared_ptr<db_copy_thread_t> m_processor;
std::unique_ptr<db_cmd_copy_t> m_current;
};
#endif
osm2pgsql (0.96.0+ds-4) UNRELEASED; urgency=medium
osm2pgsql (1.0.0+ds-1) unstable; urgency=medium
* Add patch to use Python 3 for regression-test.py.
* New upstream release.
* Drop spelling-errors.patch, applied upstream.
* Move from experimental to unstable.
-- Bas Couwenberg <sebastic@debian.org> Fri, 30 Aug 2019 06:52:48 +0200
osm2pgsql (1.00.0~rc1+ds-1~exp1) experimental; urgency=medium
* New upstream release candidate.
* Add patch to fix spelling errors.
-- Bas Couwenberg <sebastic@debian.org> Sat, 27 Jul 2019 13:47:49 +0200
-- Bas Couwenberg <sebastic@debian.org> Mon, 19 Aug 2019 05:44:53 +0200
osm2pgsql (0.96.0+ds-3) unstable; urgency=medium
......
Description: Use Python 3 for regression-test.py.
Author: Bas Couwenberg <sebastic@debian.org>
Forwarded: https://github.com/openstreetmap/osm2pgsql/pull/939
Applied-Upstream: https://github.com/openstreetmap/osm2pgsql/commit/ea23dc9d5dc65b5cb2efa3e18beb4a270c734d98
--- a/tests/regression-test.py
+++ b/tests/regression-test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
import unittest
import psycopg2
@@ -342,14 +342,14 @@ class BaseTestCase(unittest.TestCase):
self.conn.autocommit = True
self.cur = self.conn.cursor()
except Exception as e:
- print "I am unable to connect to the database." + e
+ print("I am unable to connect to the database." + e)
def dbClose(self):
self.cur.close()
self.conn.close()
def executeStatements(self, seq):
- print "*********************************"
+ print("*********************************")
self.dbConnect()
try:
for i in seq:
@@ -418,8 +418,8 @@ class BasicNonSlimTestCase(BaseNonSlimTe
self.setUpGeneric(self.parameters, full_import_file)
def runTest(self):
- print "****************************************"
- print "Running initial import for " + self.name
+ print("****************************************")
+ print("Running initial import for " + self.name)
self.executeStatements(self.initialStatements)
@@ -437,10 +437,10 @@ class BasicSlimTestCase(BaseSlimTestCase
def runTest(self):
- print "****************************************"
- print "Running initial import for " + self.name
+ print("****************************************")
+ print("Running initial import for " + self.name)
self.executeStatements(self.initialStatements)
- print "Running diff-import for " + self.name
+ print("Running diff-import for " + self.name)
self.updateGeneric(self.parameters, diff_import_file)
self.executeStatements(self.postDiffStatements)
@@ -458,10 +458,10 @@ class MultipolygonSlimTestCase(BaseSlimT
def runTest(self):
- print "****************************************"
- print "Running initial import for " + self.name
+ print("****************************************")
+ print("Running initial import for " + self.name)
self.executeStatements(self.initialStatements)
- print "Running diff-import for " + self.name
+ print("Running diff-import for " + self.name)
self.updateGeneric(self.parameters, diff_multipoly_import_file)
self.executeStatements(self.postDiffStatements)
@@ -480,10 +480,10 @@ class BasicGazetteerTestCase(BaseGazette
def runTest(self):
- print "****************************************"
- print "Running initial import in gazetteer mode for " + self.name
+ print("****************************************")
+ print("Running initial import in gazetteer mode for " + self.name)
self.executeStatements(self.initialStatements)
- print "Running diff-import in gazetteer mode for " + self.name
+ print("Running diff-import in gazetteer mode for " + self.name)
self.updateGeneric(self.parameters, diff_import_file)
self.executeStatements(self.postDiffStatements)
@@ -508,26 +508,26 @@ def findContribSql(filename):
#****************************************************************
#****************************************************************
def setupDB():
- print "Setting up test database"
+ print("Setting up test database")
try:
gen_conn=psycopg2.connect("dbname='template1'")
gen_conn.autocommit = True
except Exception as e:
- print "I am unable to connect to the database."
+ print("I am unable to connect to the database.")
exit(1)
try:
gen_cur = gen_conn.cursor()
except Exception as e:
gen_conn.close()
- print "I am unable to connect to the database."
+ print("I am unable to connect to the database.")
exit(1)
try:
gen_cur.execute("""DROP DATABASE IF EXISTS \"osm2pgsql-test\"""")
gen_cur.execute("""CREATE DATABASE \"osm2pgsql-test\" WITH ENCODING 'UTF8'""")
except Exception as e:
- print "Failed to create osm2pgsql-test db" + e.pgerror
+ print("Failed to create osm2pgsql-test db" + e.pgerror)
exit(1);
finally:
gen_cur.close()
@@ -537,13 +537,13 @@ def setupDB():
test_conn=psycopg2.connect("dbname='osm2pgsql-test'")
test_conn.autocommit = True
except Exception as e:
- print "I am unable to connect to the database." + e
+ print("I am unable to connect to the database." + e)
exit(1)
try:
test_cur = test_conn.cursor()
except Exception as e:
- print "I am unable to connect to the database." + e
+ print("I am unable to connect to the database." + e)
gen_conn.close()
exit(1)
@@ -556,15 +556,15 @@ def setupDB():
test_cur.execute("""SELECT spcname FROM pg_tablespace WHERE spcname = 'tablespacetest'""")
if test_cur.fetchone():
- print "We already have a tablespace, can use that"
+ print("We already have a tablespace, can use that")
else:
- print "The test needs a temporary tablespace to run in, but it does not exist. Please create the temporary tablespace. On Linux, you can do this by running:"
- print " sudo mkdir -p /tmp/psql-tablespace"
- print " sudo /bin/chown postgres.postgres /tmp/psql-tablespace"
- print " psql -c \"CREATE TABLESPACE tablespacetest LOCATION '/tmp/psql-tablespace'\" postgres"
+ print("The test needs a temporary tablespace to run in, but it does not exist. Please create the temporary tablespace. On Linux, you can do this by running:")
+ print(" sudo mkdir -p /tmp/psql-tablespace")
+ print(" sudo /bin/chown postgres.postgres /tmp/psql-tablespace")
+ print(" psql -c \"CREATE TABLESPACE tablespacetest LOCATION '/tmp/psql-tablespace'\" postgres")
exit(77)
except Exception as e:
- print "Failed to create directory for tablespace" + str(e)
+ print("Failed to create directory for tablespace" + str(e))
# Check for postgis
try:
@@ -593,13 +593,13 @@ def setupDB():
test_conn.close()
def tearDownDB():
- print "Cleaning up test database"
+ print("Cleaning up test database")
try:
gen_conn=psycopg2.connect("dbname='template1'")
gen_conn.autocommit = True
gen_cur = gen_conn.cursor()
except Exception as e:
- print "I am unable to connect to the database."
+ print("I am unable to connect to the database.")
exit(1)
try:
@@ -607,7 +607,7 @@ def tearDownDB():
if (created_tablespace == 1):
gen_cur.execute("""DROP TABLESPACE IF EXISTS \"tablespacetest\"""")
except Exception as e:
- print "Failed to clean up osm2pgsql-test db" + e.pgerror
+ print("Failed to clean up osm2pgsql-test db" + e.pgerror)
exit(1);
gen_cur.close()
@@ -646,8 +646,8 @@ finally:
tearDownDB()
if success:
- print "All tests passed :-)"
+ print("All tests passed :-)")
exit(0)
else:
- print "Some tests failed :-("
+ print("Some tests failed :-(")
exit(1)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -73,7 +73,7 @@ executing ``ctest``.
Regression tests require python and psycopg to be installed. On Ubuntu run:
```sh
-sudo apt-get install python-psycopg2
+sudo apt-get install python3-psycopg2
```
Most of these tests depend on being able to set up a database and run osm2pgsql
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -68,7 +68,7 @@ if (NOT LUA_FOUND)
set_tests_properties(test-output-multi-tags PROPERTIES WILL_FAIL on)
endif()
-find_package(PythonInterp)
+find_package(PythonInterp 3)
if (PYTHONINTERP_FOUND)
add_test(NAME regression-test-pbf COMMAND ${PYTHON_EXECUTABLE} tests/regression-test.py -f tests/liechtenstein-2013-08-03.osm.pbf -x $<TARGET_FILE:osm2pgsql>
python3.patch
......@@ -200,12 +200,6 @@ invalid polygons. With this option, invalid polygons are instead simply dropped
from the database. Even without this option, all polygons in the database should
be valid.
.TP
\fB\ \fR\-\-unlogged
Use postgresql's unlogged tables for storing data. This requires PostgreSQL 9.1
or above. Data written to unlogged tables is not written to PostgreSQL's write\-ahead log,
which makes them considerably faster than ordinary tables. However, they are not
crash\-safe: an unlogged table is automatically truncated after a crash or unclean shutdown.
.TP
\fB\ \fR\-\-number\-processes num
Specifies the number of parallel processes used for certain operations. If disks are
fast enough e.g. if you have an SSD, then this can greatly increase speed of
......
......@@ -27,6 +27,15 @@ tables which are used by the pgsql middle layer, not the backend:
With the ``--flat-nodes`` option, the ``planet_osm_nodes`` information is
instead stored in a binary file.
**Note:** The names and structure of these additional tables, colloquially
referred to as "slim tables", are an *internal implemention detail* of
osm2pgsql. While they do not usually change between releases of osm2pgsql,
be advised that if you rely on the content or layout of these tables in
your application, it is your responsibility to check whether your assumptions
are still true in a newer version of osm2pgsql before updating. See
https://github.com/openstreetmap/osm2pgsql/issues/230 for a discussion of
the topic.
## Importing ##
1. Runs a parser on the input file and processes the nodes, ways and relations.
......
# Command-line usage #
Osm2pgsql has one program, the executable itself, which has **44** command line
Osm2pgsql has one program, the executable itself, which has **43** command line
options. A full list of options can be obtained with ``osm2pgsql -h -v``. This
document provides an overview of options, and more importantly, why you might
use them.
......@@ -59,9 +59,6 @@ offers significant space savings and speed increases, particularly on
mechanical drives. The file takes approximately 8 bytes * maximum node ID, or
about 23 GiB, regardless of the size of the extract.
``--unlogged`` specifies to use unlogged tables which are dropped from the
database if the database server ever crashes, but are faster to import.
``--prefix`` specifies the prefix for tables
## Middle-layer options ##
......
......@@ -14,6 +14,8 @@
#include <cerrno>
#include <string>
#include <boost/format.hpp>
#include "expire-tiles.hpp"
#include "options.hpp"
#include "reprojection.hpp"
......
#include <algorithm>
#include <cstring>
#include <boost/property_tree/json_parser.hpp>
#include <osmium/osm.hpp>
#include "gazetteer-style.hpp"
#include "pgsql.hpp"
#include "wkb.hpp"
enum : int
{
MAX_ADMINLEVEL = 15
};
static std::vector<osmium::Tag const *>
domain_names(char const *cls, osmium::TagList const &tags)
{
std::vector<osmium::Tag const *> ret;
std::string const prefix = cls + std::string(":name");
auto plen = prefix.length();
for (auto const &item : tags) {
char const *k = item.key();
if (prefix.compare(0, plen, k) == 0 &&
(k[plen] == '\0' || k[plen] == ':')) {
ret.push_back(&item);
}
}
return ret;
}
namespace pt = boost::property_tree;
static auto place_table =
std::make_shared<db_target_descr_t>("place", "place_id");
void gazetteer_style_t::clear()
{
m_main.clear();
m_names.clear();
m_extra.clear();
m_address.clear();
m_operator = nullptr;
m_admin_level = MAX_ADMINLEVEL;
m_is_named = false;
}
bool gazetteer_style_t::has_place(std::string const &cls) const
{
return std::any_of(m_main.begin(), m_main.end(), [&](pmaintag_t const &e) {
if (strcmp(std::get<0>(e), cls.c_str()) == 0) {
if (std::get<2>(e) & SF_MAIN_NAMED)
return !m_names.empty();
// XXX should handle SF_MAIN_NAMED_KEY as well
return true;
}
return false;
});
}
void gazetteer_style_t::load_style(std::string const &filename)
{
fprintf(stderr, "Parsing gazetteer style file '%s'.\n", filename.c_str());
pt::ptree root;
pt::read_json(filename, root);
for (auto &entry : root) {
for (auto &tag : entry.second.get_child("keys")) {
for (auto &value : entry.second.get_child("values")) {
add_style_entry(tag.second.data(), value.first,
parse_flags(value.second.data()));
}
}
}
}
gazetteer_style_t::flag_t gazetteer_style_t::parse_flags(std::string const &str)
{
flag_t out = 0;
std::string::size_type start = 0;
while (start != std::string::npos) {
auto end = str.find(',', start);
std::string item;
if (end == std::string::npos) {
item = str.substr(start);
start = std::string::npos;
} else {
item = str.substr(start, end - start);
start = end + 1;
}
if (item == "skip") {
return 0;
} else if (item == "main") {
out |= SF_MAIN;
} else if (item == "with_name_key") {
out |= SF_MAIN_NAMED_KEY;
} else if (item == "with_name") {
out |= SF_MAIN_NAMED;
} else if (item == "fallback") {
out |= SF_MAIN_FALLBACK;
} else if (item == "operator") {
out |= SF_MAIN_OPERATOR;
m_any_operator_matches = true;
} else if (item == "name") {
out |= SF_NAME;
} else if (item == "ref") {
out |= SF_REF;
} else if (item == "address") {
out |= SF_ADDRESS;
} else if (item == "house") {
out |= SF_ADDRESS_POINT;
} else if (item == "postcode") {
out |= SF_POSTCODE;
} else if (item == "country") {
out |= SF_COUNTRY;
} else if (item == "extra") {
out |= SF_EXTRA;
} else if (item == "interpolation") {
out |= SF_INTERPOLATION;
} else {
throw std::runtime_error("Unknown flag in style file.");
}
}
return out;
}
bool gazetteer_style_t::add_metadata_style_entry(std::string const &key)
{
if (key == "osm_version") {
m_metadata_fields.set_version(true);
} else if (key == "osm_timestamp") {
m_metadata_fields.set_timestamp(true);
} else if (key == "osm_changeset") {
m_metadata_fields.set_changeset(true);
} else if (key == "osm_uid") {
m_metadata_fields.set_uid(true);
} else if (key == "osm_user") {
m_metadata_fields.set_user(true);
} else {
return false;
}
return true;
}
void gazetteer_style_t::add_style_entry(std::string const &key,
std::string const &value,
gazetteer_style_t::flag_t flags)
{
if (key.empty()) {
if (value.empty()) {
m_default = flags;
} else {
m_matcher.emplace_back(value, flags, matcher_t::MT_VALUE);
}
return;
}
// prefix: works on empty key only
if (key[key.size() - 1] == '*') {
if (key.size() == 1) {
throw std::runtime_error("Style error. Ambiguous key '*'.");
}
if (!value.empty()) {
throw std::runtime_error(
"Style error. Prefix key can only be used with empty value.\n");
}
m_matcher.emplace_back(key.substr(0, key.size() - 1), flags,
matcher_t::MT_PREFIX);
return;
}
// suffix: dito
if (key[0] == '*') {
if (!value.empty()) {
throw std::runtime_error(
"Style error. Suffix key can only be used with empty value.\n");
}
m_matcher.emplace_back(key.substr(1), flags, matcher_t::MT_SUFFIX);
return;
}
if (key == "boundary") {
if (value.empty() || value == "administrative") {
flags |= SF_BOUNDARY;
}
}
if (add_metadata_style_entry(key)) {
if (!value.empty()) {
throw std::runtime_error("Style error. Rules for OSM metadata "
"attributes must have an empty value.\n");
}
if (flags != SF_EXTRA) {
throw std::runtime_error("Style error. Rules for OSM metadata "
"attributes must have the style flag "
"\"extra\" and no other flag.\n");
}
return;
}
if (value.empty()) {
m_matcher.emplace_back(key, flags, matcher_t::MT_KEY);
} else {
m_matcher.emplace_back(key + '\0' + value, flags, matcher_t::MT_FULL);
}
}
gazetteer_style_t::flag_t gazetteer_style_t::find_flag(char const *k,
char const *v) const
{
auto klen = std::strlen(k);
auto vlen = std::strlen(v);
// full match
auto fulllen = klen + vlen + 1U;
for (auto const &e : m_matcher) {
switch (e.type) {
case matcher_t::MT_FULL:
if (e.name.size() == fulllen && strcmp(k, e.name.c_str()) == 0 &&
memcmp(v, e.name.data() + klen + 1, vlen) == 0) {
return e.flag;
}
break;
case matcher_t::MT_KEY:
if (e.name.size() == klen && memcmp(k, e.name.data(), klen) == 0) {
return e.flag;
}
break;
case matcher_t::MT_PREFIX:
if (e.name.size() < klen &&
memcmp(k, e.name.data(), e.name.size()) == 0) {
return e.flag;
}
break;
case matcher_t::MT_SUFFIX:
if (e.name.size() < klen &&
memcmp(k + klen - e.name.size(), e.name.data(),
e.name.size()) == 0) {
return e.flag;
}
break;
case matcher_t::MT_VALUE:
if (e.name.size() == vlen && memcmp(v, e.name.data(), vlen) == 0) {
return e.flag;
}
break;
}
}
return m_default;
}
void gazetteer_style_t::process_tags(osmium::OSMObject const &o)
{
clear();
char const *postcode = nullptr;
char const *country = nullptr;
char const *place = nullptr;
flag_t place_flag;
bool address_point = false;
bool interpolation = false;
bool admin_boundary = false;
bool postcode_fallback = false;
for (auto const &item : o.tags()) {
char const *k = item.key();
char const *v = item.value();
if (strcmp(k, "admin_level") == 0) {
m_admin_level = atoi(v);
if (m_admin_level <= 0 || m_admin_level > MAX_ADMINLEVEL)
m_admin_level = MAX_ADMINLEVEL;
continue;
}
if (m_any_operator_matches && strcmp(k, "operator") == 0) {
m_operator = v;
}
flag_t flag = find_flag(k, v);
if (flag == 0) {
continue;
}
if (flag & SF_MAIN) {
if (strcmp(k, "place") == 0) {
place = v;
place_flag = flag;
} else {
m_main.emplace_back(k, v, flag);
if ((flag & SF_BOUNDARY) && strcmp(v, "administrative") == 0) {
admin_boundary = true;
}
}
}
if (flag & (SF_NAME | SF_REF)) {
m_names.emplace_back(k, v);
if (flag & SF_NAME) {
m_is_named = true;
}
}
if (flag & SF_ADDRESS) {
char const *addr_key;
if (strncmp(k, "addr:", 5) == 0) {
addr_key = k + 5;
} else if (strncmp(k, "is_in:", 6) == 0) {
addr_key = k + 6;
} else {
addr_key = k;
}
if (strcmp(addr_key, "postcode") == 0) {
if (!postcode) {
postcode = v;
}
} else if (strcmp(addr_key, "country") == 0) {
if (!country && strlen(v) == 2) {
country = v;
}
} else {
bool first = std::none_of(
m_address.begin(), m_address.end(), [&](ptag_t const &t) {
return strcmp(t.first, addr_key) == 0;
});
if (first) {
m_address.emplace_back(addr_key, v);
}
}
}
if (flag & SF_ADDRESS_POINT) {
address_point = true;
m_is_named = true;
}
if ((flag & SF_POSTCODE) && !postcode) {
postcode = v;
if (flag & SF_MAIN_FALLBACK) {
postcode_fallback = true;
}
}
if ((flag & SF_COUNTRY) && !country && std::strlen(v) == 2) {
country = v;
}
if (flag & SF_EXTRA) {
m_extra.emplace_back(k, v);
}
if (flag & SF_INTERPOLATION) {
m_main.emplace_back("place", "houses", SF_MAIN);
interpolation = true;
}
}
if (postcode) {
m_address.emplace_back("postcode", postcode);
}
if (country) {
m_address.emplace_back("country", country);
}
if (place) {
if (interpolation || (admin_boundary &&
strncmp(place, "isl", 3) != 0)) // island or islet
m_extra.emplace_back("place", place);
else
m_main.emplace_back("place", place, place_flag);
}
if (address_point) {
m_main.emplace_back("place", "house", SF_MAIN | SF_MAIN_FALLBACK);
} else if (postcode_fallback && postcode) {
m_main.emplace_back("place", "postcode", SF_MAIN | SF_MAIN_FALLBACK);
}
}
bool gazetteer_style_t::copy_out(osmium::OSMObject const &o,
std::string const &geom, db_copy_mgr_t &buffer)
{
bool any = false;
for (auto const &main : m_main) {
if (!(std::get<2>(main) & SF_MAIN_FALLBACK)) {
any |= copy_out_maintag(main, o, geom, buffer);
}
}
if (any)
return true;
for (auto const &main : m_main) {
if ((std::get<2>(main) & SF_MAIN_FALLBACK) &&
copy_out_maintag(main, o, geom, buffer)) {
return true;
}
}
return false;
}
bool gazetteer_style_t::copy_out_maintag(pmaintag_t const &tag,
osmium::OSMObject const &o,
std::string const &geom,
db_copy_mgr_t &buffer)
{
std::vector<osmium::Tag const *> domain_name;
if (std::get<2>(tag) & SF_MAIN_NAMED_KEY) {
domain_name = domain_names(std::get<0>(tag), o.tags());
if (domain_name.empty())
return false;
}
if (std::get<2>(tag) & SF_MAIN_NAMED) {
if (domain_name.empty() && !m_is_named) {
return false;
}
}
buffer.new_line(place_table);
// osm_id
buffer.add_column(o.id());
// osm_type
char const osm_type[2] = { (char)toupper(osmium::item_type_to_char(o.type())), '\0'};
buffer.add_column(osm_type);
// class
buffer.add_column(std::get<0>(tag));
// type
buffer.add_column(std::get<1>(tag));
// names
if (!domain_name.empty()) {
auto prefix_len = strlen(std::get<0>(tag)) + 1; // class name and ':'
buffer.new_hash();
for (auto *t : domain_name) {
buffer.add_hash_elem(t->key() + prefix_len, t->value());
}
buffer.finish_hash();
} else {
bool first = true;
// operator will be ignored on anything but these classes
if (m_operator && (std::get<2>(tag) & SF_MAIN_OPERATOR)) {
buffer.new_hash();
buffer.add_hash_elem("operator", m_operator);
first = false;
}
for (auto const &entry : m_names) {
if (first) {
buffer.new_hash();
first = false;
}
buffer.add_hash_elem(entry.first, entry.second);
}
if (first) {
buffer.add_null_column();
} else {
buffer.finish_hash();
}
}
// admin_level
buffer.add_column(m_admin_level);
// address
if (m_address.empty()) {
buffer.add_null_column();
} else {
buffer.new_hash();
for (auto const &a : m_address) {
if (strcmp(a.first, "tiger:county") == 0) {
std::string term;
auto *end = strchr(a.second, ',');
if (end) {
auto len = (std::string::size_type)(end - a.second);
term = std::string(a.second, len);
} else {
term = a.second;
}
term += " county";
buffer.add_hash_elem(a.first, term);
} else {
buffer.add_hash_elem(a.first, a.second);
}
}
buffer.finish_hash();
}
// extra tags
if (m_extra.empty() && m_metadata_fields.none()) {
buffer.add_null_column();
} else {
buffer.new_hash();
for (auto const &entry : m_extra) {
buffer.add_hash_elem(entry.first, entry.second);
}
if (m_metadata_fields.version() && o.version()) {
buffer.add_hstore_num_noescape<osmium::object_version_type>(
"osm_version", o.version());
}
if (m_metadata_fields.uid() && o.uid()) {
buffer.add_hstore_num_noescape<osmium::user_id_type>("osm_uid", o.uid());
}
if (m_metadata_fields.user() && o.user() && *(o.user()) != '\0') {
buffer.add_hash_elem("osm_user", o.user());
}
if (m_metadata_fields.changeset() && o.changeset()) {
buffer.add_hstore_num_noescape<osmium::changeset_id_type>(
"osm_changeset", o.changeset());
}
if (m_metadata_fields.timestamp() && o.timestamp()) {
std::string timestamp = o.timestamp().to_iso();
buffer.add_hash_elem_noescape("osm_timestamp", timestamp.c_str());
}
buffer.finish_hash();
}
// add the geometry - encoding it to hex along the way
buffer.add_hex_geom(geom);
buffer.finish_line();
return true;
}
#ifndef GAZETTEER_STYLE_HPP
#define GAZETTEER_STYLE_HPP
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include <boost/format.hpp>
#include <osmium/osm/metadata_options.hpp>
#include "db-copy.hpp"
class gazetteer_style_t
{
using flag_t = uint16_t;
using ptag_t = std::pair<char const *, char const *>;
using pmaintag_t = std::tuple<char const *, char const *, flag_t>;
enum style_flags
{
SF_MAIN = 1 << 0,
SF_MAIN_NAMED = 1 << 1,
SF_MAIN_NAMED_KEY = 1 << 2,
SF_MAIN_FALLBACK = 1 << 3,
SF_MAIN_OPERATOR = 1 << 4,
SF_NAME = 1 << 5,
SF_REF = 1 << 6,
SF_ADDRESS = 1 << 7,
SF_ADDRESS_POINT = 1 << 8,
SF_POSTCODE = 1 << 9,
SF_COUNTRY = 1 << 10,
SF_EXTRA = 1 << 11,
SF_INTERPOLATION = 1 << 12,
SF_BOUNDARY = 1 << 13, // internal flag for boundaries
};
enum class matcher_t
{
MT_FULL,
MT_KEY,
MT_PREFIX,
MT_SUFFIX,
MT_VALUE
};
struct string_with_flag_t
{
std::string name;
flag_t flag;
matcher_t type;
string_with_flag_t(std::string const &n, flag_t f, matcher_t t)
: name(n), flag(f), type(t)
{
}
};
using flag_list_t = std::vector<string_with_flag_t>;
public:
void load_style(std::string const &filename);
void process_tags(osmium::OSMObject const &o);
bool copy_out(osmium::OSMObject const &o, std::string const &geom,
db_copy_mgr_t &buffer);
bool has_place(std::string const &cls) const;
bool has_data() const { return !m_main.empty(); }
private:
bool add_metadata_style_entry(std::string const &key);
void add_style_entry(std::string const &key, std::string const &value,
flag_t flags);
flag_t parse_flags(std::string const &str);
flag_t find_flag(char const *k, char const *v) const;
bool copy_out_maintag(pmaintag_t const &tag, osmium::OSMObject const &o,
std::string const &geom, db_copy_mgr_t &buffer);
void clear();
// Style data.
flag_list_t m_matcher;
flag_t m_default{0};
bool m_any_operator_matches{false};
// Cached OSM object data.
/// class/type pairs to include
std::vector<pmaintag_t> m_main;
/// name tags to include
std::vector<ptag_t> m_names;
/// extratags to include
std::vector<ptag_t> m_extra;
/// addresstags to include
std::vector<ptag_t> m_address;
/// value of operator tag
char const *m_operator;
/// admin level
int m_admin_level;
/// True if there is an actual name to the object (not a ref).
bool m_is_named;
/// which metadata fields of the OSM objects should be written to the output
osmium::metadata_options m_metadata_fields{"none"};
};
#endif
......@@ -81,7 +81,8 @@ relation_helper::relation_helper()
: data(1024, osmium::memory::Buffer::auto_grow::yes)
{}
size_t relation_helper::set(osmium::Relation const &rel, middle_t const *mid)
size_t relation_helper::set(osmium::Relation const &rel,
middle_query_t const *mid)
{
// cleanup
data.clear();
......@@ -90,13 +91,10 @@ size_t relation_helper::set(osmium::Relation const &rel, middle_t const *mid)
// get the nodes and roles of the ways
auto num_ways = mid->rel_way_members_get(rel, &roles, data);
// mark the ends of each so whoever uses them will know where they end..
superseded.resize(num_ways);
return num_ways;
}
void relation_helper::add_way_locations(middle_t const *mid)
void relation_helper::add_way_locations(middle_query_t const *mid)
{
for (auto &w : data.select<osmium::Way>()) {
mid->nodes_get_list(&(w.nodes()));
......
......@@ -13,7 +13,6 @@
#include "tagtransform.hpp"
struct middle_query_t;
struct middle_t;
struct options_t;
class reprojection;
......@@ -86,11 +85,10 @@ class relation_helper
public:
relation_helper();
size_t set(osmium::Relation const &rel, middle_t const *mid);
void add_way_locations(middle_t const *mid);
size_t set(osmium::Relation const &rel, middle_query_t const *mid);
void add_way_locations(middle_query_t const *mid);
rolelist_t roles;
std::vector<int> superseded;
osmium::memory::Buffer data;
};
......