Skip to content

AugmentedDiff#

Core class for retrieving and parsing OpenStreetMap augmented diffs.

For continuous monitoring of changes, see ContinuousAugmentedDiff.

Features#

  • Single diff retrieval
  • Bounding box filtering
  • Automatic sequence number handling
  • Context manager support

Basic Usage#

from osmdiff import AugmentedDiff

# Create with bounding box for London
adiff = AugmentedDiff(
    minlon=-0.489,
    minlat=51.28,
    maxlon=0.236,
    maxlat=51.686
)

# Retrieve and process changes
status = adiff.retrieve()
if status == 200:
    print(f"Created: {len(adiff.create)} features")
    print(f"Modified: {len(adiff.modify)} features")
    print(f"Deleted: {len(adiff.delete)} features")

API Reference#

An Augmented Diff representation for OpenStreetMap changes.

Handles retrieval and parsing of OpenStreetMap augmented diffs containing detailed changes to OSM data (creations, modifications, deletions).

Parameters:

Name Type Description Default
minlon Optional[float]

Minimum longitude of bounding box (WGS84)

None
minlat Optional[float]

Minimum latitude of bounding box (WGS84)

None
maxlon Optional[float]

Maximum longitude of bounding box (WGS84)

None
maxlat Optional[float]

Maximum latitude of bounding box (WGS84)

None
file Optional[str]

Path to local augmented diff XML file

None
sequence_number Optional[int]

Sequence number of the diff

None
base_url Optional[str]

Override default Overpass API URL

None
timeout Optional[int]

Request timeout in seconds

None
Note

The bounding box coordinates should be in WGS84 (EPSG:4326) format.

Source code in src/osmdiff/augmenteddiff.py
def __init__(
    self,
    minlon: Optional[float] = None,
    minlat: Optional[float] = None,
    maxlon: Optional[float] = None,
    maxlat: Optional[float] = None,
    file: Optional[str] = None,
    sequence_number: Optional[int] = None,
    timestamp: Optional[datetime] = None,
    base_url: Optional[str] = None,
    timeout: Optional[int] = None,
) -> None:
    # Initialize with defaults from config
    self.base_url = base_url or API_CONFIG["overpass"]["base_url"]
    self.timeout = timeout or API_CONFIG["overpass"]["timeout"]

    # Initialize other config values
    self.minlon = minlon
    self.minlat = minlat
    self.maxlon = maxlon
    self.maxlat = maxlat
    self.timestamp = timestamp
    self._remarks = []
    self._sequence_number = None
    self._create = []
    self._modify = []
    self._delete = []
    if file:
        with open(file, "r") as file_handle:
            self._parse_stream(file_handle)
    else:
        self.sequence_number = sequence_number
        if minlon and minlat and maxlon and maxlat:
            if maxlon > minlon and maxlat > minlat:
                self.minlon = minlon
                self.minlat = minlat
                self.maxlon = maxlon
                self.maxlat = maxlat
            else:
                raise Exception("invalid bbox.")
    self._logger = logging.getLogger(__name__)

sequence_number property writable #

Get the sequence number identifying this diff.

Sequence numbers increment monotonically and uniquely identify each diff.

timestamp property writable #

Get the timestamp of when the changes in this diff were made.

Returns:

Name Type Description
datetime datetime

The timestamp parsed from the diff metadata

remarks property #

Get the list of remarks from the augmented diff.

Remarks provide additional metadata about the changes in the diff.

actions property #

Get all actions combined in a single list.

get_state(base_url=None, timeout=None) classmethod #

Get the current sequence number from the Overpass API.

Parameters:

Name Type Description Default
base_url Optional[str]

Override default Overpass API URL (deprecated)

None
timeout Optional[int]

Optional override for request timeout

None

Returns:

Name Type Description
int Optional[dict]

Sequence number

Source code in src/osmdiff/augmenteddiff.py
@classmethod
def get_state(
    cls, base_url: Optional[str] = None, timeout: Optional[int] = None
) -> Optional[dict]:
    """Get the current sequence number from the Overpass API.

    Args:
        base_url: Override default Overpass API URL (deprecated)
        timeout: Optional override for request timeout

    Returns:
        int: Sequence number
    """
    state_url = API_CONFIG["overpass"]["state_url"]
    response = requests.get(
        state_url, timeout=timeout or 5, headers=DEFAULT_HEADERS
    )
    response.raise_for_status()
    return_dict = {"sequence_number": int(response.text), "timestamp": None}
    return return_dict

retrieve(clear_cache=False, timeout=None, auto_increment=True, max_retries=3) #

Retrieve the Augmented diff corresponding to the sequence_number.

Parameters:

Name Type Description Default
clear_cache bool

Whether to clear existing data before retrieval.

False
timeout Optional[int]

Request timeout in seconds.

None
auto_increment bool

Whether to automatically increment sequence number after retrieval.

True
max_retries int

Maximum number of retry attempts for failed requests.

3

Returns:

Type Description
int

HTTP status code of the request (200 for success)

Raises:

Type Description
Exception

If sequence_number is not set

RequestException

If all retry attempts fail

Source code in src/osmdiff/augmenteddiff.py
def retrieve(
    self,
    clear_cache: bool = False,
    timeout: Optional[int] = None,
    auto_increment: bool = True,
    max_retries: int = 3,
) -> int:
    """Retrieve the Augmented diff corresponding to the sequence_number.

    Args:
        clear_cache: Whether to clear existing data before retrieval.
        timeout: Request timeout in seconds.
        auto_increment: Whether to automatically increment sequence number after retrieval.
        max_retries: Maximum number of retry attempts for failed requests.

    Returns:
        HTTP status code of the request (200 for success)

    Raises:
        Exception: If sequence_number is not set
        requests.exceptions.RequestException: If all retry attempts fail
    """
    if not self.sequence_number:
        raise Exception("invalid sequence number")

    if clear_cache:
        self._create, self._modify, self._delete = ([], [], [])

    url = self.base_url.format(sequence_number=self.sequence_number)

    self._logger.info(f"Retrieving diff {self.sequence_number} from {url}")

    # Store current data before making request
    prev_create = self._create.copy()
    prev_modify = self._modify.copy()
    prev_delete = self._delete.copy()

    # Use a longer timeout if none specified
    request_timeout = (
        timeout or self.timeout or 120
    )  # 2 minutes default, this will still fail for very large diffs, like 12346

    for attempt in range(max_retries):
        try:
            # Exponential backoff between retries
            if attempt > 0:
                time.sleep(2**attempt)  # 2, 4, 8 seconds...

            r = requests.get(
                url, stream=True, timeout=request_timeout, headers=DEFAULT_HEADERS
            )

            if r.status_code != 200:
                return r.status_code

            r.raw.decode_content = True

            # Clear current lists but keep previous data
            self._create, self._modify, self._delete = ([], [], [])

            # Parse new data
            self._parse_stream(r.raw)

            # Merge with previous data
            self._create = prev_create + self._create
            self._modify = prev_modify + self._modify
            self._delete = prev_delete + self._delete

            # Automatically increment sequence number after successful retrieval
            if auto_increment:
                self.sequence_number += 1

            return r.status_code

        except (
            requests.exceptions.ReadTimeout,
            requests.exceptions.ConnectionError,
        ) as e:
            if attempt == max_retries - 1:  # Last attempt
                raise
            continue

    return 0  # Should never reach here due to raise in except block

__repr__() #

Source code in src/osmdiff/augmenteddiff.py
def __repr__(self):
    return """AugmentedDiff ({create} created, {modify} modified, {delete} deleted)""".format(
        create=len(self._create),
        modify=len(self._modify),
        delete=len(self._delete),
    )

__enter__() #

Source code in src/osmdiff/augmenteddiff.py
def __enter__(self):
    return self

__exit__(exc_type, exc_val, exc_tb) #

Source code in src/osmdiff/augmenteddiff.py
def __exit__(self, exc_type, exc_val, exc_tb):
    self._create.clear()
    self._modify.clear()
    self._delete.clear()

See Also#