From cb85d455341bc8cf4cb8226937979d986a4eea8e Mon Sep 17 00:00:00 2001 From: Colton Loftus <70598503+C-Loftus@users.noreply.github.com> Date: Thu, 24 Apr 2025 15:53:41 -0400 Subject: [PATCH 1/5] Mysql Provider (#6) - mysql provider and associated tests - refactor postgres and mysql to use a shared base class that they both inherit from - handle geometry / encoding necessary to use mysql wity sqlalchemy --------- Co-authored-by: Benjamin Webb <40066515+webb-ben@users.noreply.github.com> --- .github/workflows/main.yml | 13 + pygeoapi/plugin.py | 3 +- pygeoapi/process/manager/postgresql.py | 6 +- pygeoapi/provider/{postgresql.py => sql.py} | 336 +++++++++++++------- requirements-provider.txt | 3 +- tests/data/mysql_data.sql | 24 ++ tests/test_mysql_provider.py | 171 ++++++++++ tests/test_postgresql_provider.py | 4 +- 8 files changed, 447 insertions(+), 113 deletions(-) rename pygeoapi/provider/{postgresql.py => sql.py} (73%) create mode 100644 tests/data/mysql_data.sql create mode 100644 tests/test_mysql_provider.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 069914afb..5a117e59e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -53,6 +53,17 @@ jobs: with: postgresql password: ${{ secrets.DatabasePassword || 'postgres' }} postgresql db: 'test' + + - name: "Install and run MySQL 📦" + uses: mirromutth/mysql-action@v1.1 + with: + host port: 3306 + mysql version: '8.0' + mysql database: test_geo_app + mysql root password: mysql # This is a dummy password here; not actually used in prod + mysql user: pygeoapi + mysql password: mysql + - name: Install and run Elasticsearch 📦 uses: getong/elasticsearch-action@v1.2 with: @@ -111,6 +122,7 @@ jobs: psql postgresql://postgres:${{ secrets.DatabasePassword || 'postgres' }}@localhost:5432/test -f tests/data/dummy_data.sql psql postgresql://postgres:${{ secrets.DatabasePassword || 'postgres' }}@localhost:5432/test -f tests/data/dummy_types_data.sql psql postgresql://postgres:${{ secrets.DatabasePassword || 'postgres' }}@localhost:5432/test -f tests/data/postgres_manager_full_structure.backup.sql + mysql -h 127.0.0.1 -P 3306 -u root -p'mysql' test_geo_app < tests/data/mysql_data.sql docker ps python3 tests/load_oracle_data.py - name: run unit tests ⚙️ @@ -142,6 +154,7 @@ jobs: pytest tests/test_oracle_provider.py pytest tests/test_parquet_provider.py pytest tests/test_postgresql_provider.py + pytest tests/test_mysql_provider.py pytest tests/test_rasterio_provider.py pytest tests/test_sensorthings_edr_provider.py pytest tests/test_sensorthings_provider.py diff --git a/pygeoapi/plugin.py b/pygeoapi/plugin.py index 08324cf64..36786a24b 100644 --- a/pygeoapi/plugin.py +++ b/pygeoapi/plugin.py @@ -53,11 +53,12 @@ 'MVT-tippecanoe': 'pygeoapi.provider.mvt_tippecanoe.MVTTippecanoeProvider', # noqa: E501 'MVT-elastic': 'pygeoapi.provider.mvt_elastic.MVTElasticProvider', 'MVT-proxy': 'pygeoapi.provider.mvt_proxy.MVTProxyProvider', + 'MySQL': 'pygeoapi.provider.sql.MySQLProvider', 'OracleDB': 'pygeoapi.provider.oracle.OracleProvider', 'OGR': 'pygeoapi.provider.ogr.OGRProvider', 'OpenSearch': 'pygeoapi.provider.opensearch_.OpenSearchProvider', 'Parquet': 'pygeoapi.provider.parquet.ParquetProvider', - 'PostgreSQL': 'pygeoapi.provider.postgresql.PostgreSQLProvider', + 'PostgreSQL': 'pygeoapi.provider.sql.PostgreSQLProvider', 'rasterio': 'pygeoapi.provider.rasterio_.RasterioProvider', 'SensorThings': 'pygeoapi.provider.sensorthings.SensorThingsProvider', 'SensorThingsEDR': 'pygeoapi.provider.sensorthings_edr.SensorThingsEDRProvider', # noqa: E501 diff --git a/pygeoapi/process/manager/postgresql.py b/pygeoapi/process/manager/postgresql.py index 16d25ab8f..bf5033eef 100644 --- a/pygeoapi/process/manager/postgresql.py +++ b/pygeoapi/process/manager/postgresql.py @@ -56,7 +56,7 @@ ProcessorGenericError ) from pygeoapi.process.manager.base import BaseManager -from pygeoapi.provider.postgresql import get_engine, get_table_model +from pygeoapi.provider.sql import get_engine, get_table_model from pygeoapi.util import JobStatus @@ -92,13 +92,15 @@ def __init__(self, manager_def: dict): if isinstance(self.connection, str): _url = make_url(self.connection) self._engine = get_engine( + 'postgresql+psycopg2', _url.host, _url.port, _url.database, _url.username, _url.password) else: - self._engine = get_engine(**self.connection) + self._engine = get_engine('postgresql+psycopg2', + **self.connection) except Exception as err: msg = 'Test connecting to DB failed' LOGGER.error(f'{msg}: {err}') diff --git a/pygeoapi/provider/postgresql.py b/pygeoapi/provider/sql.py similarity index 73% rename from pygeoapi/provider/postgresql.py rename to pygeoapi/provider/sql.py index afc29a584..3cc8ed2c9 100644 --- a/pygeoapi/provider/postgresql.py +++ b/pygeoapi/provider/sql.py @@ -7,12 +7,14 @@ # Colin Blackburn # Francesco Bartoli # Bernhard Mallinger +# Colton Loftus # # Copyright (c) 2018 Jorge Samuel Mendes de Jesus # Copyright (c) 2025 Tom Kralidis # Copyright (c) 2022 John A Stevenson and Colin Blackburn # Copyright (c) 2025 Francesco Bartoli # Copyright (c) 2024 Bernhard Mallinger +# Copyright (c) 2025 Colton Loftus # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -37,24 +39,12 @@ # # ================================================================= -# Testing local docker: -# docker run --name "postgis" \ -# -v postgres_data:/var/lib/postgresql -p 5432:5432 \ -# -e ALLOW_IP_RANGE=0.0.0.0/0 \ -# -e POSTGRES_USER=postgres \ -# -e POSTGRES_PASS=postgres \ -# -e POSTGRES_DBNAME=test \ -# -d -t kartoza/postgis - -# Import dump: -# gunzip < tests/data/hotosm_bdi_waterways.sql.gz | -# psql -U postgres -h 127.0.0.1 -p 5432 test - from copy import deepcopy from datetime import datetime from decimal import Decimal import functools import logging +from typing import Optional from geoalchemy2 import Geometry # noqa - this isn't used explicitly but is needed to process Geometry columns from geoalchemy2.functions import ST_MakeEnvelope @@ -62,47 +52,70 @@ from pygeofilter.backends.sqlalchemy.evaluate import to_filter import pyproj import shapely -from sqlalchemy import create_engine, MetaData, PrimaryKeyConstraint, asc, \ - desc, delete +from sqlalchemy.sql import func +from sqlalchemy import ( + create_engine, + MetaData, + PrimaryKeyConstraint, + asc, + desc, + delete +) from sqlalchemy.engine import URL -from sqlalchemy.exc import ConstraintColumnNotFoundError, \ - InvalidRequestError, OperationalError +from sqlalchemy.exc import ( + ConstraintColumnNotFoundError, + InvalidRequestError, + OperationalError +) from sqlalchemy.ext.automap import automap_base from sqlalchemy.orm import Session, load_only from sqlalchemy.sql.expression import and_ -from pygeoapi.provider.base import BaseProvider, \ - ProviderConnectionError, ProviderInvalidDataError, ProviderQueryError, \ +from pygeoapi.provider.base import ( + BaseProvider, + ProviderConnectionError, + ProviderInvalidDataError, + ProviderQueryError, ProviderItemNotFoundError +) from pygeoapi.util import get_transform_from_crs LOGGER = logging.getLogger(__name__) -class PostgreSQLProvider(BaseProvider): - """Generic provider for Postgresql based on psycopg2 - using sync approach and server side - cursor (using support class DatabaseCursor) +class GenericSQLProvider(BaseProvider): + """ + Generic provider for sql databases it can be inherited + from to create specific providers for different databases """ - def __init__(self, provider_def): + def __init__( + self, + provider_def: dict, + driver_name: str, + extra_conn_args: Optional[dict] + ): """ - PostgreSQLProvider Class constructor + GenericSQLProvider Class constructor :param provider_def: provider definitions from yml pygeoapi-config. data,id_field, name set in parent class data contains the connection information for class DatabaseCursor + :param driver_name: database driver name + :param extra_conn_args: additional custom connection arguments to + pass for a query - :returns: pygeoapi.provider.base.PostgreSQLProvider + :returns: pygeoapi.provider.GenericSQLProvider """ - LOGGER.debug('Initialising PostgreSQL provider.') + LOGGER.debug('Initialising GenericSQL provider.') super().__init__(provider_def) self.table = provider_def['table'] self.id_field = provider_def['id_field'] self.geom = provider_def.get('geom_field', 'geom') + self.driver_name = driver_name LOGGER.debug(f'Name: {self.name}') LOGGER.debug(f'Table: {self.table}') @@ -112,8 +125,7 @@ def __init__(self, provider_def): # conforming to the docs: # https://docs.pygeoapi.io/en/latest/data-publishing/ogcapi-features.html#connection-examples # noqa self.storage_crs = provider_def.get( - 'storage_crs', - 'https://www.opengis.net/def/crs/OGC/0/CRS84' + 'storage_crs', 'https://www.opengis.net/def/crs/OGC/0/CRS84' ) LOGGER.debug(f'Configured Storage CRS: {self.storage_crs}') @@ -123,29 +135,39 @@ def __init__(self, provider_def): options = provider_def['options'] self._store_db_parameters(provider_def['data'], options) self._engine = get_engine( + driver_name, self.db_host, self.db_port, self.db_name, self.db_user, self._db_password, - **(self.db_options or {}) + **{**(self.db_options or {}), **(extra_conn_args or {})} ) self.table_model = get_table_model( - self.table, - self.id_field, - self.db_search_path, - self._engine + self.table, self.id_field, self.db_search_path, self._engine ) LOGGER.debug(f'DB connection: {repr(self._engine.url)}') self.get_fields() - def query(self, offset=0, limit=10, resulttype='results', - bbox=[], datetime_=None, properties=[], sortby=[], - select_properties=[], skip_geometry=False, q=None, - filterq=None, crs_transform_spec=None, **kwargs): + def query( + self, + offset=0, + limit=10, + resulttype='results', + bbox=[], + datetime_=None, + properties=[], + sortby=[], + select_properties=[], + skip_geometry=False, + q=None, + filterq=None, + crs_transform_spec=None, + **kwargs + ): """ - Query Postgis for all the content. + Query sql database for all the content. e,g: http://localhost:5000/collections/hotosm_bdi_waterways/items? limit=1&resulttype=results @@ -171,18 +193,21 @@ def query(self, offset=0, limit=10, resulttype='results', bbox_filter = self._get_bbox_filter(bbox) time_filter = self._get_datetime_filter(datetime_) order_by_clauses = self._get_order_by_clauses(sortby, self.table_model) - selected_properties = self._select_properties_clause(select_properties, - skip_geometry) + selected_properties = self._select_properties_clause( + select_properties, skip_geometry + ) - LOGGER.debug('Querying PostGIS') + LOGGER.debug('Querying Database') # Execute query within self-closing database Session context with Session(self._engine) as session: - results = (session.query(self.table_model) - .filter(property_filters) - .filter(cql_filters) - .filter(bbox_filter) - .filter(time_filter) - .options(selected_properties)) + results = ( + session.query(self.table_model) + .filter(property_filters) + .filter(cql_filters) + .filter(bbox_filter) + .filter(time_filter) + .options(selected_properties) + ) matched = results.count() @@ -193,15 +218,17 @@ def query(self, offset=0, limit=10, resulttype='results', 'type': 'FeatureCollection', 'features': [], 'numberMatched': matched, - 'numberReturned': 0 + 'numberReturned': 0, } - if resulttype == "hits" or not results: + if resulttype == 'hits' or not results: return response crs_transform_out = self._get_crs_transform(crs_transform_spec) - for item in results.order_by(*order_by_clauses).offset(offset).limit(limit): # noqa + for item in ( + results.order_by(*order_by_clauses).offset(offset).limit(limit) + ): # noqa response['numberReturned'] += 1 response['features'].append( self._sqlalchemy_to_feature(item, crs_transform_out) @@ -211,7 +238,7 @@ def query(self, offset=0, limit=10, resulttype='results', def get_fields(self): """ - Return fields (columns) from PostgreSQL table + Return fields (columns) from database table :returns: dict of fields """ @@ -269,7 +296,9 @@ def _column_format_to_json_schema_format(column_type): self._fields[str(column.name)] = { 'type': _column_type_to_json_schema_type(column.type), - 'format': _column_format_to_json_schema_format(column.type) + 'format': _column_format_to_json_schema_format( + column.type + ), } return self._fields @@ -291,7 +320,7 @@ def get(self, identifier, crs_transform_spec=None, **kwargs): # Retrieve data from database as feature item = session.get(self.table_model, identifier) if item is None: - msg = f"No such item: {self.id_field}={identifier}." + msg = f'No such item: {self.id_field}={identifier}.' raise ProviderItemNotFoundError(msg) crs_transform_out = self._get_crs_transform(crs_transform_spec) feature = self._sqlalchemy_to_feature(item, crs_transform_out) @@ -306,18 +335,28 @@ def get(self, identifier, crs_transform_spec=None, **kwargs): # Add fields for previous and next items id_field = getattr(self.table_model, self.id_field) - prev_item = (session.query(self.table_model) - .order_by(id_field.desc()) - .filter(id_field < identifier) - .first()) - next_item = (session.query(self.table_model) - .order_by(id_field.asc()) - .filter(id_field > identifier) - .first()) - feature['prev'] = (getattr(prev_item, self.id_field) - if prev_item is not None else identifier) - feature['next'] = (getattr(next_item, self.id_field) - if next_item is not None else identifier) + prev_item = ( + session.query(self.table_model) + .order_by(id_field.desc()) + .filter(id_field < identifier) + .first() + ) + next_item = ( + session.query(self.table_model) + .order_by(id_field.asc()) + .filter(id_field > identifier) + .first() + ) + feature['prev'] = ( + getattr(prev_item, self.id_field) + if prev_item is not None + else identifier + ) + feature['next'] = ( + getattr(next_item, self.id_field) + if next_item is not None + else identifier + ) return feature @@ -331,7 +370,8 @@ def create(self, item): """ identifier, json_data = self._load_and_prepare_item( - item, accept_missing_identifier=True) + item, accept_missing_identifier=True + ) new_instance = self._feature_to_sqlalchemy(json_data, identifier) with Session(self._engine) as session: @@ -353,7 +393,8 @@ def update(self, identifier, item): """ identifier, json_data = self._load_and_prepare_item( - item, raise_if_exists=False) + item, raise_if_exists=False + ) new_instance = self._feature_to_sqlalchemy(json_data, identifier) with Session(self._engine) as session: @@ -373,8 +414,7 @@ def delete(self, identifier): with Session(self._engine) as session: id_column = getattr(self.table_model, self.id_field) result = session.execute( - delete(self.table_model) - .where(id_column == identifier) + delete(self.table_model).where(id_column == identifier) ) session.commit() @@ -393,9 +433,7 @@ def _store_db_parameters(self, parameters, options): self.db_options = options def _sqlalchemy_to_feature(self, item, crs_transform_out=None): - feature = { - 'type': 'Feature' - } + feature = {'type': 'Feature'} # Add properties from item item_dict = item.__dict__ @@ -406,7 +444,10 @@ def _sqlalchemy_to_feature(self, item, crs_transform_out=None): # Convert geometry to GeoJSON style if feature['properties'].get(self.geom): wkb_geom = feature['properties'].pop(self.geom) - shapely_geom = to_shape(wkb_geom) + try: + shapely_geom = to_shape(wkb_geom) + except TypeError: + shapely_geom = shapely.geometry.shape(wkb_geom) if crs_transform_out is not None: shapely_geom = crs_transform_out(shapely_geom) geojson_geom = shapely.geometry.mapping(shapely_geom) @@ -457,7 +498,8 @@ def _get_cql_filters(self, filterq): # Convert filterq into SQL Alchemy filters field_mapping = { column_name: getattr(self.table_model, column_name) - for column_name in self.table_model.__table__.columns.keys()} + for column_name in self.table_model.__table__.columns.keys() + } cql_filters = to_filter(filterq, field_mapping) return cql_filters @@ -476,16 +518,13 @@ def _get_property_filters(self, properties): return property_filters - def _get_bbox_filter(self, bbox): - if not bbox: - return True # Let everything through - - # Convert bbx to SQL Alchemy clauses - envelope = ST_MakeEnvelope(*bbox) - geom_column = getattr(self.table_model, self.geom) - bbox_filter = geom_column.intersects(envelope) - - return bbox_filter + def _get_bbox_filter(self, bbox: list[float]): + """ + Construct the bounding box filter function that + will be used in the query; this is dependent on the + underlying db driver + """ + raise NotImplementedError def _get_datetime_filter(self, datetime_): if datetime_ in (None, '../..'): @@ -541,7 +580,7 @@ def _get_crs_transform(self, crs_transform_spec=None): if crs_transform_spec is not None: crs_transform = get_transform_from_crs( pyproj.CRS.from_wkt(crs_transform_spec.source_crs_wkt), - pyproj.CRS.from_wkt(crs_transform_spec.target_crs_wkt), + pyproj.CRS.from_wkt(crs_transform_spec.target_crs_wkt) ) else: crs_transform = None @@ -550,16 +589,17 @@ def _get_crs_transform(self, crs_transform_spec=None): @functools.cache def get_engine( - host: str, - port: str, - database: str, - user: str, - password: str, - **connection_options + driver_name: str, + host: str, + port: str, + database: str, + user: str, + password: str, + **connection_options, ): """Create SQL Alchemy engine.""" conn_str = URL.create( - 'postgresql+psycopg2', + drivername=driver_name, username=user, password=password, host=host, @@ -567,23 +607,20 @@ def get_engine( database=database ) conn_args = { - 'client_encoding': 'utf8', - 'application_name': 'pygeoapi', - **connection_options, + **connection_options } engine = create_engine( - conn_str, - connect_args=conn_args, - pool_pre_ping=True) + conn_str, connect_args=conn_args, pool_pre_ping=True + ) return engine @functools.cache def get_table_model( - table_name: str, - id_field: str, - db_search_path: tuple[str], - engine, + table_name: str, + id_field: str, + db_search_path: tuple[str], + engine, ): """Reflect table.""" metadata = MetaData() @@ -592,14 +629,16 @@ def get_table_model( schema = db_search_path[0] try: metadata.reflect( - bind=engine, schema=schema, only=[table_name], views=True) + bind=engine, schema=schema, only=[table_name], views=True + ) except OperationalError: raise ProviderConnectionError( - f"Could not connect to {repr(engine.url)} (password hidden).") + f'Could not connect to {repr(engine.url)} (password hidden).' + ) except InvalidRequestError: raise ProviderQueryError( f"Table '{table_name}' not found in schema '{schema}' " - f"on {repr(engine.url)}." + f'on {repr(engine.url)}.' ) # Create SQLAlchemy model from reflected table @@ -611,7 +650,8 @@ def get_table_model( sqlalchemy_table_def.append_constraint(PrimaryKeyConstraint(id_field)) except (ConstraintColumnNotFoundError, KeyError): raise ProviderQueryError( - f"No such id_field column ({id_field}) on {schema}.{table_name}.") + f'No such id_field column ({id_field}) on {schema}.{table_name}.' + ) _Base = automap_base(metadata=metadata) _Base.prepare( @@ -634,3 +674,85 @@ def _name_for_scalar_relationship(base, local_cls, referred_cls, constraint): ) return newname return name + + +class PostgreSQLProvider(GenericSQLProvider): + """ + A provider for querying a PostgreSQL database + """ + + def __init__(self, provider_def: dict): + """ + PostgreSQLProvider Class constructor + + :param provider_def: provider definitions from yml pygeoapi-config. + data,id_field, name set in parent class + data contains the connection information + for class DatabaseCursor + :returns: pygeoapi.provider.sql.PostgreSQLProvider + """ + + driver_name = 'postgresql+psycopg2' + extra_conn_args = { + 'client_encoding': 'utf8', + 'application_name': 'pygeoapi' + } + super().__init__(provider_def, driver_name, extra_conn_args) + + def _get_bbox_filter(self, bbox: list[float]): + """ + Construct the bounding box filter function + """ + if not bbox: + return True # Let everything through if no bbox + + # Since this provider uses postgis, we can use ST_MakeEnvelope + envelope = ST_MakeEnvelope(*bbox) + geom_column = getattr(self.table_model, self.geom) + bbox_filter = geom_column.intersects(envelope) + + return bbox_filter + + +class MySQLProvider(GenericSQLProvider): + """ + A provider for a MySQL database + """ + + def __init__(self, provider_def: dict): + """ + MySQLProvider Class constructor + + :param provider_def: provider definitions from yml pygeoapi-config. + data,id_field, name set in parent class + data contains the connection information + for class DatabaseCursor + :returns: pygeoapi.provider.sql.MySQLProvider + """ + + driver_name = 'mysql+pymysql' + extra_conn_args = { + 'charset': 'utf8mb4' + } + super().__init__(provider_def, driver_name, extra_conn_args) + + def _get_bbox_filter(self, bbox: list[float]): + """ + Construct the bounding box filter function + """ + if not bbox: + return True # Let everything through if no bbox + + # If we are using mysql we can't use ST_MakeEnvelope since it is + # postgis specific and thus we have to use MBRContains with a WKT + # POLYGON + + # Create WKT POLYGON from bbox: (minx, miny, maxx, maxy) + minx, miny, maxx, maxy = bbox + polygon_wkt = f'POLYGON(({minx} {miny}, {maxx} {miny}, {maxx} {maxy}, {minx} {maxy}, {minx} {miny}))' # noqa + geom_column = getattr(self.table_model, self.geom) + # Use MySQL MBRContains for index-accelerated bounding box checks + bbox_filter = func.MBRContains( + func.ST_GeomFromText(polygon_wkt), geom_column + ) + return bbox_filter diff --git a/requirements-provider.txt b/requirements-provider.txt index 08c15acdf..dc70631a0 100644 --- a/requirements-provider.txt +++ b/requirements-provider.txt @@ -19,8 +19,9 @@ pygeofilter[backend-sqlalchemy] pygeoif pygeometa pymongo==4.6.3 +pymysql scipy sodapy xarray zarr -s3fs<=2023.6.0 +s3fs<=2023.6.0 \ No newline at end of file diff --git a/tests/data/mysql_data.sql b/tests/data/mysql_data.sql new file mode 100644 index 000000000..f2174ae55 --- /dev/null +++ b/tests/data/mysql_data.sql @@ -0,0 +1,24 @@ +-- A test database for the mysql provider; a simple geospatial app + +-- Create the database +DROP DATABASE IF EXISTS test_geo_app; +CREATE DATABASE test_geo_app; +USE test_geo_app; + +-- Create the location table +CREATE TABLE location ( + locationID INT AUTO_INCREMENT PRIMARY KEY, + locationName VARCHAR(100) NOT NULL, + description TEXT, + locationCoordinates POINT NOT NULL, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + SPATIAL INDEX(locationCoordinates) +); + +-- Insert sample geospatial data +INSERT INTO location (locationName, description, locationCoordinates) VALUES +('Central Park', 'A large public park in NYC', ST_GeomFromText('POINT(-73.9654 40.7829)')), +('Golden Gate Bridge', 'Iconic suspension bridge in SF', ST_GeomFromText('POINT(-122.4783 37.8199)')), +('Eiffel Tower', 'Famous Paris landmark', ST_GeomFromText('POINT(2.2945 48.8584)')), +('Sydney Opera House', 'Multi-venue performing arts centre in Australia', ST_GeomFromText('POINT(151.2153 -33.8568)')), +('Christ the Redeemer', 'Art Deco statue of Jesus Christ in Rio', ST_GeomFromText('POINT(-43.2105 -22.9519)')); diff --git a/tests/test_mysql_provider.py b/tests/test_mysql_provider.py new file mode 100644 index 000000000..86541af10 --- /dev/null +++ b/tests/test_mysql_provider.py @@ -0,0 +1,171 @@ +# ================================================================= +# +# Authors: Colton Loftus +# +# Copyright (c) 2025 Colton Loftus +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +import os +import pytest +from pygeoapi.provider.base import ( + ProviderItemNotFoundError, +) +from pygeoapi.provider.sql import MySQLProvider + +PASSWORD = os.environ.get('MYSQL_PASSWORD', 'mysql') + + +""" +For local testing, a MySQL database can be spun up with docker +compose as follows: + +services: + + mysql: + image: mysql:8 + ports: + - 3306:3306 + environment: + MYSQL_ROOT_PASSWORD: mysql + MYSQL_USER: pygeoapi + MYSQL_PASSWORD: mysql + MYSQL_DATABASE: test_geo_app + volumes: + - ./tests/data/mysql_data.sql:/docker-entrypoint-initdb.d/init.sql:ro +""" + + +@pytest.fixture() +def config(): + return { + 'name': 'MySQL', + 'type': 'feature', + 'data': { + 'host': 'localhost', + 'dbname': 'test_geo_app', + 'user': 'root', + 'port': 3306, + 'password': PASSWORD, + 'search_path': ['test_geo_app'] + }, + 'options': {'connect_timeout': 10}, + 'id_field': 'locationID', + 'table': 'location', + 'geom_field': 'locationCoordinates' + } + + +def test_valid_connection_options(config): + if config.get('options'): + keys = list(config['options'].keys()) + for key in keys: + assert key in [ + 'connect_timeout', + 'tcp_user_timeout', + 'keepalives', + 'keepalives_idle', + 'keepalives_count', + 'keepalives_interval' + ] + + +def test_query(config): + """Testing query for a valid JSON object with geometry""" + p = MySQLProvider(config) + feature_collection = p.query() + assert feature_collection.get('type') == 'FeatureCollection' + features = feature_collection.get('features') + assert features is not None + feature = features[0] + properties = feature.get('properties') + assert properties is not None + geometry = feature.get('geometry') + assert geometry is not None + + +def test_fields(config): + p = MySQLProvider(config) + fields = p.get_fields() + expectedFields = [ + 'locationID', + 'locationName', + 'description', + 'created_at', + ] + for field in expectedFields: + assert field in fields + + +def test_query_with_paging(config): + """Test query valid features with paging""" + p = MySQLProvider(config) + feature_collection = p.query(limit=2) + + ALL_ITEMS_IN_DB = 5 + assert feature_collection['numberMatched'] == ALL_ITEMS_IN_DB + assert feature_collection['numberReturned'] == 2 + + feature_collection = p.query(offset=3) + assert feature_collection['numberMatched'] == ALL_ITEMS_IN_DB + assert feature_collection['numberReturned'] == ALL_ITEMS_IN_DB - 3 + + +def test_query_bbox(config): + """Test query with a specified bounding box""" + p = MySQLProvider(config) + boxed_feature_collection = p.query(bbox=[0, 0, 0, 0]) + assert len(boxed_feature_collection['features']) == 0 + + nyc_bbox = [-73.9754, 40.7729, -73.9554, 40.7929] + + boxed_feature_collection = p.query(bbox=nyc_bbox) + assert len(boxed_feature_collection['features']) == 1 + assert boxed_feature_collection['features'][0]['id'] == 1 + + +def test_query_sortby(config): + """Test query with sorting""" + psp = MySQLProvider(config) + up = psp.query(sortby=[{'property': 'locationName', 'order': '+'}]) + firstItem = up['features'][0]['properties']['locationName'] + assert firstItem == 'Central Park' + secondItem = up['features'][1]['properties']['locationName'] + assert secondItem == 'Christ the Redeemer' + assert firstItem < secondItem + + +def test_query_skip_geometry(config): + """Test query without geometry""" + provider = MySQLProvider(config) + result = provider.query(skip_geometry=True) + feature = result['features'][0] + assert feature['geometry'] is None + + +def test_get_not_existing_item_raise_exception(config): + """Testing query for a not existing object""" + p = MySQLProvider(config) + with pytest.raises(ProviderItemNotFoundError): + p.get(-1) diff --git a/tests/test_postgresql_provider.py b/tests/test_postgresql_provider.py index 054d3978f..ac16e8e3c 100644 --- a/tests/test_postgresql_provider.py +++ b/tests/test_postgresql_provider.py @@ -57,8 +57,8 @@ ProviderItemNotFoundError, ProviderQueryError ) -from pygeoapi.provider.postgresql import PostgreSQLProvider -import pygeoapi.provider.postgresql as postgresql_provider_module +from pygeoapi.provider.sql import PostgreSQLProvider +import pygeoapi.provider.sql as postgresql_provider_module from pygeoapi.util import (yaml_load, geojson_to_geom, get_transform_from_crs, get_crs_from_uri) From d02439dce44aaf5e38112b7f31bcc0000bdd777a Mon Sep 17 00:00:00 2001 From: Colton Loftus <70598503+C-Loftus@users.noreply.github.com> Date: Thu, 24 Apr 2025 15:57:35 -0400 Subject: [PATCH 2/5] keep postgis instructions --- pygeoapi/provider/sql.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pygeoapi/provider/sql.py b/pygeoapi/provider/sql.py index 3cc8ed2c9..f78b1ac2d 100644 --- a/pygeoapi/provider/sql.py +++ b/pygeoapi/provider/sql.py @@ -39,6 +39,19 @@ # # ================================================================= +# Testing local postgis with docker: +# docker run --name "postgis" \ +# -v postgres_data:/var/lib/postgresql -p 5432:5432 \ +# -e ALLOW_IP_RANGE=0.0.0.0/0 \ +# -e POSTGRES_USER=postgres \ +# -e POSTGRES_PASS=postgres \ +# -e POSTGRES_DBNAME=test \ +# -d -t kartoza/postgis + +# Import dump: +# gunzip < tests/data/hotosm_bdi_waterways.sql.gz | +# psql -U postgres -h 127.0.0.1 -p 5432 test + from copy import deepcopy from datetime import datetime from decimal import Decimal From f2bae20df23bee38cf29d26a8e43fe5b35a38640 Mon Sep 17 00:00:00 2001 From: Colton Loftus <70598503+C-Loftus@users.noreply.github.com> Date: Mon, 5 May 2025 16:47:35 -0400 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: Benjamin Webb <40066515+webb-ben@users.noreply.github.com> --- pygeoapi/provider/sql.py | 8 ++++---- tests/test_mysql_provider.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pygeoapi/provider/sql.py b/pygeoapi/provider/sql.py index f78b1ac2d..9859fffa3 100644 --- a/pygeoapi/provider/sql.py +++ b/pygeoapi/provider/sql.py @@ -241,7 +241,7 @@ def query( for item in ( results.order_by(*order_by_clauses).offset(offset).limit(limit) - ): # noqa + ): response['numberReturned'] += 1 response['features'].append( self._sqlalchemy_to_feature(item, crs_transform_out) @@ -311,7 +311,7 @@ def _column_format_to_json_schema_format(column_type): 'type': _column_type_to_json_schema_type(column.type), 'format': _column_format_to_json_schema_format( column.type - ), + ) } return self._fields @@ -608,7 +608,7 @@ def get_engine( database: str, user: str, password: str, - **connection_options, + **connection_options ): """Create SQL Alchemy engine.""" conn_str = URL.create( @@ -633,7 +633,7 @@ def get_table_model( table_name: str, id_field: str, db_search_path: tuple[str], - engine, + engine ): """Reflect table.""" metadata = MetaData() diff --git a/tests/test_mysql_provider.py b/tests/test_mysql_provider.py index 86541af10..0f470d750 100644 --- a/tests/test_mysql_provider.py +++ b/tests/test_mysql_provider.py @@ -112,7 +112,7 @@ def test_fields(config): 'locationID', 'locationName', 'description', - 'created_at', + 'created_at' ] for field in expectedFields: assert field in fields From fd3c94f0e303e2fbba3cf70991caae8017bb2525 Mon Sep 17 00:00:00 2001 From: Colton Loftus <70598503+C-Loftus@users.noreply.github.com> Date: Mon, 5 May 2025 16:47:47 -0400 Subject: [PATCH 4/5] Update pygeoapi/provider/sql.py Co-authored-by: Benjamin Webb <40066515+webb-ben@users.noreply.github.com> --- pygeoapi/provider/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygeoapi/provider/sql.py b/pygeoapi/provider/sql.py index 9859fffa3..d29ddf603 100644 --- a/pygeoapi/provider/sql.py +++ b/pygeoapi/provider/sql.py @@ -231,7 +231,7 @@ def query( 'type': 'FeatureCollection', 'features': [], 'numberMatched': matched, - 'numberReturned': 0, + 'numberReturned': 0 } if resulttype == 'hits' or not results: From 1c39869c55dabc0f4b0348abed0a74ea48f99ac6 Mon Sep 17 00:00:00 2001 From: Colton Loftus <70598503+C-Loftus@users.noreply.github.com> Date: Mon, 5 May 2025 21:39:56 -0400 Subject: [PATCH 5/5] Update pygeoapi/provider/sql.py Co-authored-by: Benjamin Webb <40066515+webb-ben@users.noreply.github.com> --- pygeoapi/provider/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygeoapi/provider/sql.py b/pygeoapi/provider/sql.py index d29ddf603..864d5be0c 100644 --- a/pygeoapi/provider/sql.py +++ b/pygeoapi/provider/sql.py @@ -154,7 +154,7 @@ def __init__( self.db_name, self.db_user, self._db_password, - **{**(self.db_options or {}), **(extra_conn_args or {})} + **(self.db_options or {}) | (extra_conn_args or {}) ) self.table_model = get_table_model( self.table, self.id_field, self.db_search_path, self._engine