""" This script demonstrates how to use the pagination protocol to download a large dataset from Frost. To test the script, save it to a file client.py and run e.g. like this: python client.py --url 'https://havvarsel-frost.met.no/api/v1/obs/glider/get?time=2020-10-07T09:00:00Z/2020-10-07T10:00:00Z&incobs=true&itemlimit=100' """ import argparse import sys import requests # See https://requests.readthedocs.io class PaginationProtocolDemo: def __init__(self, aggregate_response): url = self.__parse_args() self.__aggregate_response = aggregate_response self.__download_dataset(url) @staticmethod def __parse_args(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--url', required=True, dest='url', help=( 'the request URL, e.g. --url ' '\'havvarsel-frost.met.no/api/v1/obs/glider/get?' 'time=2020-10-01T00:00:00Z/' '2020-12-07T00:00:00Z&incobs=true\'') ) res = parser.parse_args(sys.argv[1:]) return res.url @staticmethod def __truncString(s, maxlen): if (maxlen < 0) or (len(s) <= maxlen): return s return '{} ... [truncated {} chars]'.format( s[:maxlen], len(s) - maxlen) def __download_dataset(self, url): npages = 0 headers = {'X-Frost-Ptsheader': ''} # initialize protocol while True: # request next page print('\nrequesting page {} (X-Frost-Ptsheader: {}) ...'.format( npages + 1, self.__truncString( headers.get('X-Frost-Ptsheader'), 10))) r = requests.get(url, headers=headers) if r.status_code != 200: try: json_content = r.json() except Exception: json_content = '((failed to extract json content))' raise Exception( 'request failed with status code {}: {}'.format( r.status_code, json_content)) npages = npages + 1 # aggregate response with custom function self.__aggregate_response(r.json()) # extract pagination headers from response ptsheader = r.headers.get('X-Frost-Nextptsheader') if ptsheader is None: raise Exception('X-Frost-Nextptsheader unexpectedly None!') print('X-Frost-Nextptsheader: {}'.format( self.__truncString(ptsheader, 10))) if not ptsheader: # we just got the last page print('\npage sequence complete after {} pages'.format(npages)) self.__aggregate_response({}) # notify client break # terminate protocol ptsbaseid = r.headers.get('X-Frost-Nextptsbaseid') if ptsbaseid is None: raise Exception('X-Frost-Nextptsbaseid unexpectedly None!') ptime = r.headers.get('X-Frost-Nextptime') if ptime is None: raise Exception('X-Frost-Nextptime unexpectedly None!') try: ptime = int(ptime) except ValueError: raise Exception('X-Frost-Nextptime unexpectedly non-int!') # set headers for next page headers = { 'X-Frost-Ptsheader': '{}'.format(ptsheader), 'X-Frost-Ptsbaseid': '{}'.format(ptsbaseid), 'X-Frost-Ptime': '{}'.format(ptime) } if __name__ == "__main__": # custom aggregation function def aggregate_response(response): if not response: print('got last page; response aggregation complete') else: print('aggregating next page (dict size in bytes: {}) ... ' 'skipped for now!'.format(len(str(response)))) try: PaginationProtocolDemo(aggregate_response) except KeyboardInterrupt: print('keyboard interrupt') except Exception: print('error: {}'.format(sys.exc_info()[1]), file=sys.stderr) sys.exit(1) sys.exit(0)