Skip to content
Commits on Source (8)
......@@ -4,14 +4,22 @@
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).
## [unreleased] -
## [2.14.4] - 2018-10-30
### Added
- allow to add arbitrary headers when updating files
- replication: custom URL opener
- cookie support for pyosmium-get-changes and pyosmium-up-to-date
### Changed
- pyosmium-up-to-date declares itself as 'generator'
### Fixed
- bug when reading sequence ID files in pyosmium-get-changes
## [2.14.3] - 2018-08-08
### Added
......
pyosmium (2.14.4-1~bpo9+1) stretch-backports; urgency=medium
* Rebuild for stretch-backports.
-- Bas Couwenberg <sebastic@debian.org> Mon, 05 Nov 2018 07:31:07 +0100
pyosmium (2.14.4-1) unstable; urgency=medium
* New upstream release.
* Update watch file to limit matches to archive path.
* Bump Standards-Version to 4.2.1, no changes.
-- Bas Couwenberg <sebastic@debian.org> Tue, 30 Oct 2018 16:35:29 +0100
pyosmium (2.14.3-1~bpo9+1) stretch-backports; urgency=medium
* Rebuild for stretch-backports.
......
......@@ -23,7 +23,7 @@ Build-Depends: debhelper (>= 9),
python3-nose,
python3-sphinx,
python3-sphinxcontrib.autoprogram
Standards-Version: 4.2.0
Standards-Version: 4.2.1
Vcs-Browser: https://salsa.debian.org/debian-gis-team/pyosmium/
Vcs-Git: https://salsa.debian.org/debian-gis-team/pyosmium.git -b stretch-backports
Homepage: https://osmcode.org/pyosmium/
......
......@@ -4,4 +4,4 @@ dversionmangle=s/\+(debian|dfsg|ds|deb)\d*$//,\
uversionmangle=s/(\d)[_\.\-\+]?((RC|rc|pre|dev|beta|alpha)\d*)$/$1~$2/,\
filenamemangle=s/(?:.*\/)?(?:rel|v|pyosmium)[\-\_]?(\d[\d\-\.]+)\.(tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz)))/pyosmium-$1.$2/ \
https://github.com/osmcode/pyosmium/releases \
(?:.*/)?(?:rel|v|pyosmium)[\-\_]?(\d[\d\-\.]+)\.(?:tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz)))
(?:.*?/archive/)?(?:rel|v|pyosmium)[\-\_]?(\d[\d\-\.]+)\.(?:tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz)))
......@@ -111,7 +111,7 @@ struct SimpleHandlerWrap: BaseHandler, wrapper<BaseHandler> {
}
}
void way(const osmium::Way& way) {
void way(const osmium::Way& way) override {
if (!(m_callbacks & osmium::osm_entity_bits::way))
return;
......@@ -119,7 +119,7 @@ struct SimpleHandlerWrap: BaseHandler, wrapper<BaseHandler> {
f(boost::ref(way));
}
void relation(const osmium::Relation& rel) {
void relation(const osmium::Relation& rel) override {
if (!(m_callbacks & osmium::osm_entity_bits::relation))
return;
......@@ -127,7 +127,7 @@ struct SimpleHandlerWrap: BaseHandler, wrapper<BaseHandler> {
f(boost::ref(rel));
}
void changeset(const osmium::Changeset& cs) {
void changeset(const osmium::Changeset& cs) override {
if (!(m_callbacks & osmium::osm_entity_bits::changeset))
return;
......@@ -135,7 +135,7 @@ struct SimpleHandlerWrap: BaseHandler, wrapper<BaseHandler> {
f(boost::ref(cs));
}
void area(const osmium::Area& area) {
void area(const osmium::Area& area) override {
if (!(m_callbacks & osmium::osm_entity_bits::area))
return;
......
......@@ -96,6 +96,8 @@ suffixes = [ # Debian naming convention for version installed in parallel
"-%d.%d" % (pyversion.major, pyversion.minor),
# Darwin
"%d%d" % (pyversion.major, pyversion.minor),
# Darwin installed by MacPorts
"%d-mt" % (pyversion.major),
# standard suffix for Python3
"%d" % (pyversion.major),
# standard naming
......
......@@ -34,6 +34,27 @@ class ReplicationServer(object):
self.baseurl = url
self.diff_type = diff_type
def open_url(self, url):
""" Download a resource from the given URL and return a byte sequence
of the content.
This method has no support for cookies or any special authentication
methods. If you need these, you have to provide your own custom URL
opener. The method has to return an object which supports the
`read()` and `readline()` methods to access the content. Example:
```
def my_open_url(self, url):
opener = urlrequest.build_opener()
opener.addheaders = [('X-Fancy-Header', 'important_content')]
return opener.open(url)
svr = ReplicationServer()
svr.open_url = my_open_url
```
"""
return urlrequest.urlopen(url)
def collect_diffs(self, start_id, max_size=1024):
""" Create a MergeInputReader and download diffs starting with sequence
id `start_id` into it. `max_size`
......@@ -113,7 +134,7 @@ class ReplicationServer(object):
return diffs.id
def apply_diffs_to_file(self, infile, outfile, start_id, max_size=1024,
set_replication_header=True):
set_replication_header=True, extra_headers={}):
""" Download diffs starting with sequence id `start_id`, merge them
with the data from the OSM file named `infile` and write the result
into a file with the name `outfile`. The output file must not yet
......@@ -128,6 +149,9 @@ class ReplicationServer(object):
will be written into the `writer`. Note that this currently works
only for the PBF format.
`extra_headers` is a dict with additional header fields to be set.
Most notably, the 'generator' can be set this way.
The function returns a tuple of last downloaded sequence id and
newest available sequence id if new data has been written or None
if no data was available or the download failed completely.
......@@ -147,6 +171,8 @@ class ReplicationServer(object):
h.set("osmosis_replication_sequence_number", str(diffs.id))
info = self.get_state_info(diffs.id)
h.set("osmosis_replication_timestamp", info.timestamp.strftime("%Y-%m-%dT%H:%M:%SZ"))
for k,v in extra_headers.items():
h.set(k, v)
writer = oio.Writer(outfile, h)
......@@ -249,8 +275,9 @@ class ReplicationServer(object):
returns `None`.
"""
try:
response = urlrequest.urlopen(self.get_state_url(seq))
except:
response = self.open_url(self.get_state_url(seq))
except Exception as err:
logging.error(err)
return None
ts = None
......@@ -282,7 +309,7 @@ class ReplicationServer(object):
(or `urllib2.HTTPError` in python2)
if the file cannot be downloaded.
"""
return urlrequest.urlopen(self.get_diff_url(seq)).read()
return self.open_url(self.get_diff_url(seq)).read()
def get_state_url(self, seq):
......
......@@ -5,7 +5,7 @@ Version information.
# the major version
pyosmium_major = '2.14'
# current release (Pip version)
pyosmium_release = '2.14.3'
pyosmium_release = '2.14.4'
# libosmium version shipped with the Pip release
libosmium_version = '2.14.2'
......
......@@ -14,6 +14,11 @@ memory during download.
On success, the program will print a single number to stdout, the sequence
number where to continue updates in the next run. This output can also be
written to (and later read from) a file.
Some OSM data sources require a cookie to be sent with the HTTP requests.
pyosmium-get-changes does not fetch the cookie from these services for you.
However, it can read cookies from a Netscape-style cookie jar file, send these
cookies to the server and will save received cookies to the jar file.
"""
from argparse import ArgumentParser, RawDescriptionHelpFormatter, ArgumentTypeError
......@@ -24,6 +29,15 @@ from osmium.replication import newest_change_from_file
from osmium.replication.utils import get_replication_header
from osmium import SimpleHandler, WriteHandler
try:
import http.cookiejar as cookiejarlib
except ImportError:
import cookielib as cookiejarlib
try:
import urllib.request as urlrequest
except ImportError:
import urllib2 as urlrequest
import re
import sys
import logging
......@@ -120,6 +134,9 @@ def get_arg_parser(from_main=False):
sequence ID will be printed where updates would start."""))
parser.add_argument('--server', action='store', dest='server_url',
help='Base URL of the replication server')
parser.add_argument('--cookie', dest='cookie',
help="""Netscape-style cookie jar file to read cookies from
and where received cookies will be written to.""")
parser.add_argument('-s', '--size', dest='outsize', type=int, default=100,
help='Maximum data to load in MB (default: 100MB).')
group = parser.add_mutually_exclusive_group()
......@@ -166,7 +183,7 @@ if __name__ == '__main__':
needs to begiven."""))
exit(1)
with open(opt.start_file, 'r') as f:
with open(options.start_file, 'r') as f:
seq = f.readline()
options.start = ReplicationStart_from_id(seq)
......@@ -185,7 +202,14 @@ if __name__ == '__main__':
or 'https://planet.osm.org/replication/minute/'
logging.info("Using replication server at %s" % url)
svr = rserv.ReplicationServer(url)
if options.cookie is not None:
# According to the documentation, the cookie jar loads the file only if FileCookieJar.load is called.
cookie_jar = cookiejarlib.MozillaCookieJar(options.cookie, None, cookie_policy)
cookie_jar.load(options.cookie)
opener = urlrequest.build_opener(urlrequest.HTTPCookieProcessor(cookie_jar))
svr.open_url = opener.open
startseq = options.start.get_sequence(svr)
if startseq is None:
......@@ -203,6 +227,10 @@ if __name__ == '__main__':
simplify=options.simplify)
outhandler.close()
# save cookies
if options.cookie:
cookie_jar.save(options.cookie)
if endseq is None:
exit(3)
......
......@@ -21,6 +21,11 @@ there is still data available on the server (either because the size
limit has been reached or there was a network error which could not be
resolved). Any other error results in a return code larger than 1. The
output file is guaranteed to be unmodified in that case.
Some OSM data sources require a cookie to be sent with the HTTP requests.
Pyosmium-up-to-date does not fetch the cookie from these services for you.
However, it can read cookies from a Netscape-style cookie jar file, send these
cookies to the server and will save received cookies to the jar file.
"""
import re
......@@ -33,10 +38,20 @@ from sys import version_info as python_version
from osmium.replication import server as rserv
from osmium.replication.utils import get_replication_header
from osmium.replication import newest_change_from_file
from osmium.version import pyosmium_release
from textwrap import dedent as msgfmt
from tempfile import mktemp
import os.path
try:
import http.cookiejar as cookiejarlib
except ImportError:
import cookielib as cookiejarlib
try:
import urllib.request as urlrequest
except ImportError:
import urllib2 as urlrequest
log = logging.getLogger()
def update_from_osm_server(ts, options):
......@@ -51,8 +66,13 @@ def update_from_osm_server(ts, options):
def update_from_custom_server(url, seq, ts, options):
"""Update from a custom URL, simply using the diff sequence as is."""
svr = rserv.ReplicationServer(url)
svr = rserv.ReplicationServer(url, "osc.gz")
if options.cookie is not None:
# According to the documentation, the cookie jar loads the file only if FileCookieJar.load is called.
cookie_jar = cookiejarlib.MozillaCookieJar(options.cookie)
cookie_jar.load(options.cookie)
opener = urlrequest.build_opener(urlrequest.HTTPCookieProcessor(cookie_jar))
svr.open_url = opener.open
log.info("Using replication service at %s" % url)
if seq is None:
......@@ -95,8 +115,10 @@ def update_from_custom_server(url, seq, ts, options):
else:
ofname = outfile
extra_headers = { 'generator' : 'pyosmium-up-to-date/' + pyosmium_release }
outseqs = svr.apply_diffs_to_file(infile, ofname, startseq,
max_size=options.outsize*1024)
max_size=options.outsize*1024,
extra_headers=extra_headers)
if outseqs is None:
log.info("No new updates found.")
......@@ -107,6 +129,10 @@ def update_from_custom_server(url, seq, ts, options):
log.info("Downloaded until %d. Server has data available until %d." % outseqs)
# save cookies
if options.cookie:
cookie_jar.save(options.cookie)
return 0 if outseqs[1] == outseqs[0] else 1
......@@ -174,9 +200,15 @@ def get_arg_parser(from_main=False):
parser.add_argument('--force-update-of-old-planet', action='store_true',
dest='force_update',
help="Apply update even if the input data is really old.")
parser.add_argument('--cookie', dest='cookie',
help="""Netscape-style cookie jar file to read cookies from and where
received cookies will be written to.""")
return parser
def open_with_cookie(url):
return opener.open(url)
if __name__ == '__main__':
logging.basicConfig(stream=sys.stderr,
format='%(asctime)s %(levelname)s: %(message)s')
......