that's too much!
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
Script to create the data and write legacy storage (pickle) files.
|
||||
|
||||
Based on pandas' generate_legacy_storage_files.py script.
|
||||
|
||||
To use this script, create an environment for which you want to
|
||||
generate pickles, activate the environment, and run this script as:
|
||||
|
||||
$ python geopandas/geopandas/io/tests/generate_legacy_storage_files.py \
|
||||
geopandas/geopandas/io/tests/data/pickle/ pickle
|
||||
|
||||
This script generates a storage file for the current arch, system,
|
||||
|
||||
The idea here is you are using the *current* version of the
|
||||
generate_legacy_storage_files with an *older* version of geopandas to
|
||||
generate a pickle file. We will then check this file into a current
|
||||
branch, and test using test_pickle.py. This will load the *older*
|
||||
pickles and test versus the current data that is generated
|
||||
(with master). These are then compared.
|
||||
|
||||
"""
|
||||
import os
|
||||
import pickle
|
||||
import platform
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import geopandas
|
||||
from shapely.geometry import Point
|
||||
|
||||
|
||||
def create_pickle_data():
|
||||
"""create the pickle data"""
|
||||
|
||||
# custom geometry column name
|
||||
gdf_the_geom = geopandas.GeoDataFrame(
|
||||
{"a": [1, 2, 3], "the_geom": [Point(1, 1), Point(2, 2), Point(3, 3)]},
|
||||
geometry="the_geom",
|
||||
)
|
||||
|
||||
# with crs
|
||||
gdf_crs = geopandas.GeoDataFrame(
|
||||
{"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
return {"gdf_the_geom": gdf_the_geom, "gdf_crs": gdf_crs}
|
||||
|
||||
|
||||
def platform_name():
|
||||
return "_".join(
|
||||
[
|
||||
str(geopandas.__version__),
|
||||
"pd-" + str(pd.__version__),
|
||||
"py-" + str(platform.python_version()),
|
||||
str(platform.machine()),
|
||||
str(platform.system().lower()),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def write_legacy_pickles(output_dir):
|
||||
print(
|
||||
"This script generates a storage file for the current arch, system, "
|
||||
"and python version"
|
||||
)
|
||||
print("geopandas version: {}").format(geopandas.__version__)
|
||||
print(" output dir : {}".format(output_dir))
|
||||
print(" storage format: pickle")
|
||||
|
||||
pth = "{}.pickle".format(platform_name())
|
||||
|
||||
fh = open(os.path.join(output_dir, pth), "wb")
|
||||
pickle.dump(create_pickle_data(), fh, pickle.DEFAULT_PROTOCOL)
|
||||
fh.close()
|
||||
|
||||
print("created pickle file: {}".format(pth))
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
sys.exit(
|
||||
"Specify output directory and storage type: generate_legacy_"
|
||||
"storage_files.py <output_dir> <storage_type> "
|
||||
)
|
||||
|
||||
output_dir = str(sys.argv[1])
|
||||
storage_type = str(sys.argv[2])
|
||||
|
||||
if storage_type == "pickle":
|
||||
write_legacy_pickles(output_dir=output_dir)
|
||||
else:
|
||||
sys.exit("storage_type must be one of {'pickle'}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,914 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from itertools import product
|
||||
import json
|
||||
from packaging.version import Version
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
import pytest
|
||||
from pandas import DataFrame, read_parquet as pd_read_parquet
|
||||
from pandas.testing import assert_frame_equal
|
||||
import numpy as np
|
||||
import pyproj
|
||||
from shapely.geometry import box, Point, MultiPolygon
|
||||
|
||||
|
||||
import geopandas
|
||||
import geopandas._compat as compat
|
||||
from geopandas import GeoDataFrame, read_file, read_parquet, read_feather
|
||||
from geopandas.array import to_wkb
|
||||
from geopandas.datasets import get_path
|
||||
from geopandas.io.arrow import (
|
||||
SUPPORTED_VERSIONS,
|
||||
_create_metadata,
|
||||
_decode_metadata,
|
||||
_encode_metadata,
|
||||
_geopandas_to_arrow,
|
||||
_get_filesystem_path,
|
||||
_remove_id_from_member_of_ensembles,
|
||||
_validate_dataframe,
|
||||
_validate_metadata,
|
||||
METADATA_VERSION,
|
||||
)
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
from geopandas.tests.util import mock
|
||||
|
||||
|
||||
DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
|
||||
|
||||
|
||||
# Skip all tests in this module if pyarrow is not available
|
||||
pyarrow = pytest.importorskip("pyarrow")
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"parquet",
|
||||
pytest.param(
|
||||
"feather",
|
||||
marks=pytest.mark.skipif(
|
||||
Version(pyarrow.__version__) < Version("0.17.0"),
|
||||
reason="needs pyarrow >= 0.17",
|
||||
),
|
||||
),
|
||||
]
|
||||
)
|
||||
def file_format(request):
|
||||
if request.param == "parquet":
|
||||
return read_parquet, GeoDataFrame.to_parquet
|
||||
elif request.param == "feather":
|
||||
return read_feather, GeoDataFrame.to_feather
|
||||
|
||||
|
||||
def test_create_metadata():
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
metadata = _create_metadata(df)
|
||||
|
||||
assert isinstance(metadata, dict)
|
||||
assert metadata["version"] == METADATA_VERSION
|
||||
assert metadata["primary_column"] == "geometry"
|
||||
assert "geometry" in metadata["columns"]
|
||||
crs_expected = df.crs.to_json_dict()
|
||||
_remove_id_from_member_of_ensembles(crs_expected)
|
||||
assert metadata["columns"]["geometry"]["crs"] == crs_expected
|
||||
assert metadata["columns"]["geometry"]["encoding"] == "WKB"
|
||||
assert metadata["columns"]["geometry"]["geometry_types"] == [
|
||||
"MultiPolygon",
|
||||
"Polygon",
|
||||
]
|
||||
|
||||
assert np.array_equal(
|
||||
metadata["columns"]["geometry"]["bbox"], df.geometry.total_bounds
|
||||
)
|
||||
|
||||
assert metadata["creator"]["library"] == "geopandas"
|
||||
assert metadata["creator"]["version"] == geopandas.__version__
|
||||
|
||||
|
||||
def test_crs_metadata_datum_ensemble():
|
||||
# compatibility for older PROJ versions using PROJJSON with datum ensembles
|
||||
# https://github.com/geopandas/geopandas/pull/2453
|
||||
crs = pyproj.CRS("EPSG:4326")
|
||||
crs_json = crs.to_json_dict()
|
||||
check_ensemble = False
|
||||
if "datum_ensemble" in crs_json:
|
||||
# older version of PROJ don't yet have datum ensembles
|
||||
check_ensemble = True
|
||||
assert "id" in crs_json["datum_ensemble"]["members"][0]
|
||||
_remove_id_from_member_of_ensembles(crs_json)
|
||||
if check_ensemble:
|
||||
assert "id" not in crs_json["datum_ensemble"]["members"][0]
|
||||
# ensure roundtrip still results in an equivalent CRS
|
||||
assert pyproj.CRS(crs_json) == crs
|
||||
|
||||
|
||||
def test_write_metadata_invalid_spec_version():
|
||||
gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="EPSG:4326")
|
||||
with pytest.raises(ValueError, match="schema_version must be one of"):
|
||||
_create_metadata(gdf, schema_version="invalid")
|
||||
|
||||
|
||||
def test_encode_metadata():
|
||||
metadata = {"a": "b"}
|
||||
|
||||
expected = b'{"a": "b"}'
|
||||
assert _encode_metadata(metadata) == expected
|
||||
|
||||
|
||||
def test_decode_metadata():
|
||||
metadata_str = b'{"a": "b"}'
|
||||
|
||||
expected = {"a": "b"}
|
||||
assert _decode_metadata(metadata_str) == expected
|
||||
|
||||
assert _decode_metadata(None) is None
|
||||
|
||||
|
||||
def test_validate_dataframe():
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
# valid: should not raise ValueError
|
||||
_validate_dataframe(df)
|
||||
_validate_dataframe(df.set_index("iso_a3"))
|
||||
|
||||
# add column with non-string type
|
||||
df[0] = 1
|
||||
|
||||
# invalid: should raise ValueError
|
||||
with pytest.raises(ValueError):
|
||||
_validate_dataframe(df)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
_validate_dataframe(df.set_index(0))
|
||||
|
||||
# not a DataFrame: should raise ValueError
|
||||
with pytest.raises(ValueError):
|
||||
_validate_dataframe("not a dataframe")
|
||||
|
||||
|
||||
def test_validate_metadata_valid():
|
||||
_validate_metadata(
|
||||
{
|
||||
"primary_column": "geometry",
|
||||
"columns": {"geometry": {"crs": None, "encoding": "WKB"}},
|
||||
"schema_version": "0.1.0",
|
||||
}
|
||||
)
|
||||
|
||||
_validate_metadata(
|
||||
{
|
||||
"primary_column": "geometry",
|
||||
"columns": {"geometry": {"crs": None, "encoding": "WKB"}},
|
||||
"version": "<version>",
|
||||
}
|
||||
)
|
||||
|
||||
_validate_metadata(
|
||||
{
|
||||
"primary_column": "geometry",
|
||||
"columns": {
|
||||
"geometry": {
|
||||
"crs": {
|
||||
# truncated PROJJSON for testing, as PROJJSON contents
|
||||
# not validated here
|
||||
"id": {"authority": "EPSG", "code": 4326},
|
||||
},
|
||||
"encoding": "WKB",
|
||||
}
|
||||
},
|
||||
"version": "0.4.0",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"metadata,error",
|
||||
[
|
||||
(None, "Missing or malformed geo metadata in Parquet/Feather file"),
|
||||
({}, "Missing or malformed geo metadata in Parquet/Feather file"),
|
||||
# missing "version" key:
|
||||
(
|
||||
{"primary_column": "foo", "columns": None},
|
||||
"'geo' metadata in Parquet/Feather file is missing required key",
|
||||
),
|
||||
# missing "columns" key:
|
||||
(
|
||||
{"primary_column": "foo", "version": "<version>"},
|
||||
"'geo' metadata in Parquet/Feather file is missing required key:",
|
||||
),
|
||||
# missing "primary_column"
|
||||
(
|
||||
{"columns": [], "version": "<version>"},
|
||||
"'geo' metadata in Parquet/Feather file is missing required key:",
|
||||
),
|
||||
(
|
||||
{"primary_column": "foo", "columns": [], "version": "<version>"},
|
||||
"'columns' in 'geo' metadata must be a dict",
|
||||
),
|
||||
# missing "encoding" for column
|
||||
(
|
||||
{"primary_column": "foo", "columns": {"foo": {}}, "version": "<version>"},
|
||||
(
|
||||
"'geo' metadata in Parquet/Feather file is missing required key "
|
||||
"'encoding' for column 'foo'"
|
||||
),
|
||||
),
|
||||
# invalid column encoding
|
||||
(
|
||||
{
|
||||
"primary_column": "foo",
|
||||
"columns": {"foo": {"crs": None, "encoding": None}},
|
||||
"version": "<version>",
|
||||
},
|
||||
"Only WKB geometry encoding is supported",
|
||||
),
|
||||
(
|
||||
{
|
||||
"primary_column": "foo",
|
||||
"columns": {"foo": {"crs": None, "encoding": "BKW"}},
|
||||
"version": "<version>",
|
||||
},
|
||||
"Only WKB geometry encoding is supported",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_validate_metadata_invalid(metadata, error):
|
||||
with pytest.raises(ValueError, match=error):
|
||||
_validate_metadata(metadata)
|
||||
|
||||
|
||||
def test_validate_metadata_edges():
|
||||
metadata = {
|
||||
"primary_column": "geometry",
|
||||
"columns": {"geometry": {"crs": None, "encoding": "WKB", "edges": "spherical"}},
|
||||
"version": "1.0.0-beta.1",
|
||||
}
|
||||
with pytest.warns(
|
||||
UserWarning,
|
||||
match="The geo metadata indicate that column 'geometry' has spherical edges",
|
||||
):
|
||||
_validate_metadata(metadata)
|
||||
|
||||
|
||||
def test_to_parquet_fails_on_invalid_engine(tmpdir):
|
||||
df = GeoDataFrame(data=[[1, 2, 3]], columns=["a", "b", "a"], geometry=[Point(1, 1)])
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=(
|
||||
"GeoPandas only supports using pyarrow as the engine for "
|
||||
"to_parquet: 'fastparquet' passed instead."
|
||||
),
|
||||
):
|
||||
df.to_parquet(tmpdir / "test.parquet", engine="fastparquet")
|
||||
|
||||
|
||||
@mock.patch("geopandas.io.arrow._to_parquet")
|
||||
def test_to_parquet_does_not_pass_engine_along(mock_to_parquet):
|
||||
df = GeoDataFrame(data=[[1, 2, 3]], columns=["a", "b", "a"], geometry=[Point(1, 1)])
|
||||
df.to_parquet("", engine="pyarrow")
|
||||
# assert that engine keyword is not passed through to _to_parquet (and thus
|
||||
# parquet.write_table)
|
||||
mock_to_parquet.assert_called_with(
|
||||
df, "", compression="snappy", index=None, schema_version=None
|
||||
)
|
||||
|
||||
|
||||
# TEMPORARY: used to determine if pyarrow fails for roundtripping pandas data
|
||||
# without geometries
|
||||
def test_pandas_parquet_roundtrip1(tmpdir):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
df.to_parquet(filename)
|
||||
|
||||
pq_df = pd_read_parquet(filename)
|
||||
|
||||
assert_frame_equal(df, pq_df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_dataset", ["naturalearth_lowres", "naturalearth_cities", "nybb"]
|
||||
)
|
||||
def test_pandas_parquet_roundtrip2(test_dataset, tmpdir):
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = DataFrame(read_file(get_path(test_dataset)).drop(columns=["geometry"]))
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
df.to_parquet(filename)
|
||||
|
||||
pq_df = pd_read_parquet(filename)
|
||||
|
||||
assert_frame_equal(df, pq_df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_dataset", ["naturalearth_lowres", "naturalearth_cities", "nybb"]
|
||||
)
|
||||
def test_roundtrip(tmpdir, file_format, test_dataset):
|
||||
"""Writing to parquet should not raise errors, and should not alter original
|
||||
GeoDataFrame
|
||||
"""
|
||||
reader, writer = file_format
|
||||
|
||||
df = read_file(get_path(test_dataset))
|
||||
orig = df.copy()
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
|
||||
writer(df, filename)
|
||||
|
||||
assert os.path.exists(filename)
|
||||
|
||||
# make sure that the original data frame is unaltered
|
||||
assert_geodataframe_equal(df, orig)
|
||||
|
||||
# make sure that we can roundtrip the data frame
|
||||
pq_df = reader(filename)
|
||||
|
||||
assert isinstance(pq_df, GeoDataFrame)
|
||||
assert_geodataframe_equal(df, pq_df)
|
||||
|
||||
|
||||
def test_index(tmpdir, file_format):
|
||||
"""Setting index=`True` should preserve index in output, and
|
||||
setting index=`False` should drop index from output.
|
||||
"""
|
||||
reader, writer = file_format
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset)).set_index("iso_a3")
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test_with_index.pq")
|
||||
writer(df, filename, index=True)
|
||||
pq_df = reader(filename)
|
||||
assert_geodataframe_equal(df, pq_df)
|
||||
|
||||
filename = os.path.join(str(tmpdir), "drop_index.pq")
|
||||
writer(df, filename, index=False)
|
||||
pq_df = reader(filename)
|
||||
assert_geodataframe_equal(df.reset_index(drop=True), pq_df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("compression", ["snappy", "gzip", "brotli", None])
|
||||
def test_parquet_compression(compression, tmpdir):
|
||||
"""Using compression options should not raise errors, and should
|
||||
return identical GeoDataFrame.
|
||||
"""
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
df.to_parquet(filename, compression=compression)
|
||||
pq_df = read_parquet(filename)
|
||||
|
||||
assert isinstance(pq_df, GeoDataFrame)
|
||||
assert_geodataframe_equal(df, pq_df)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
Version(pyarrow.__version__) < Version("0.17.0"),
|
||||
reason="Feather only supported for pyarrow >= 0.17",
|
||||
)
|
||||
@pytest.mark.parametrize("compression", ["uncompressed", "lz4", "zstd"])
|
||||
def test_feather_compression(compression, tmpdir):
|
||||
"""Using compression options should not raise errors, and should
|
||||
return identical GeoDataFrame.
|
||||
"""
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.feather")
|
||||
df.to_feather(filename, compression=compression)
|
||||
pq_df = read_feather(filename)
|
||||
|
||||
assert isinstance(pq_df, GeoDataFrame)
|
||||
assert_geodataframe_equal(df, pq_df)
|
||||
|
||||
|
||||
def test_parquet_multiple_geom_cols(tmpdir, file_format):
|
||||
"""If multiple geometry columns are present when written to parquet,
|
||||
they should all be returned as such when read from parquet.
|
||||
"""
|
||||
reader, writer = file_format
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
df["geom2"] = df.geometry.copy()
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
writer(df, filename)
|
||||
|
||||
assert os.path.exists(filename)
|
||||
|
||||
pq_df = reader(filename)
|
||||
|
||||
assert isinstance(pq_df, GeoDataFrame)
|
||||
assert_geodataframe_equal(df, pq_df)
|
||||
|
||||
assert_geoseries_equal(df.geom2, pq_df.geom2, check_geom_type=True)
|
||||
|
||||
|
||||
def test_parquet_missing_metadata(tmpdir):
|
||||
"""Missing geo metadata, such as from a parquet file created
|
||||
from a pandas DataFrame, will raise a ValueError.
|
||||
"""
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
# convert to DataFrame
|
||||
df = DataFrame(df)
|
||||
|
||||
# convert the geometry column so we can extract later
|
||||
df["geometry"] = to_wkb(df["geometry"].values)
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
|
||||
# use pandas to_parquet (no geo metadata)
|
||||
df.to_parquet(filename)
|
||||
|
||||
# missing metadata will raise ValueError
|
||||
with pytest.raises(
|
||||
ValueError, match="Missing geo metadata in Parquet/Feather file."
|
||||
):
|
||||
read_parquet(filename)
|
||||
|
||||
|
||||
def test_parquet_missing_metadata2(tmpdir):
|
||||
"""Missing geo metadata, such as from a parquet file created
|
||||
from a pyarrow Table (which will also not contain pandas metadata),
|
||||
will raise a ValueError.
|
||||
"""
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
table = pyarrow.table({"a": [1, 2, 3]})
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
|
||||
# use pyarrow.parquet write_table (no geo metadata, but also no pandas metadata)
|
||||
pq.write_table(table, filename)
|
||||
|
||||
# missing metadata will raise ValueError
|
||||
with pytest.raises(
|
||||
ValueError, match="Missing geo metadata in Parquet/Feather file."
|
||||
):
|
||||
read_parquet(filename)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"geo_meta,error",
|
||||
[
|
||||
({"geo": b""}, "Missing or malformed geo metadata in Parquet/Feather file"),
|
||||
(
|
||||
{"geo": _encode_metadata({})},
|
||||
"Missing or malformed geo metadata in Parquet/Feather file",
|
||||
),
|
||||
(
|
||||
{"geo": _encode_metadata({"foo": "bar"})},
|
||||
"'geo' metadata in Parquet/Feather file is missing required key",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_parquet_invalid_metadata(tmpdir, geo_meta, error):
|
||||
"""Has geo metadata with missing required fields will raise a ValueError.
|
||||
|
||||
This requires writing the parquet file directly below, so that we can
|
||||
control the metadata that is written for this test.
|
||||
"""
|
||||
|
||||
from pyarrow import parquet, Table
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
# convert to DataFrame and encode geometry to WKB
|
||||
df = DataFrame(df)
|
||||
df["geometry"] = to_wkb(df["geometry"].values)
|
||||
|
||||
table = Table.from_pandas(df)
|
||||
metadata = table.schema.metadata
|
||||
metadata.update(geo_meta)
|
||||
table = table.replace_schema_metadata(metadata)
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
parquet.write_table(table, filename)
|
||||
|
||||
with pytest.raises(ValueError, match=error):
|
||||
read_parquet(filename)
|
||||
|
||||
|
||||
def test_subset_columns(tmpdir, file_format):
|
||||
"""Reading a subset of columns should correctly decode selected geometry
|
||||
columns.
|
||||
"""
|
||||
reader, writer = file_format
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
writer(df, filename)
|
||||
pq_df = reader(filename, columns=["name", "geometry"])
|
||||
|
||||
assert_geodataframe_equal(df[["name", "geometry"]], pq_df)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match="No geometry columns are included in the columns read"
|
||||
):
|
||||
reader(filename, columns=["name"])
|
||||
|
||||
|
||||
def test_promote_secondary_geometry(tmpdir, file_format):
|
||||
"""Reading a subset of columns that does not include the primary geometry
|
||||
column should promote the first geometry column present.
|
||||
"""
|
||||
reader, writer = file_format
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
df["geom2"] = df.geometry.copy()
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
writer(df, filename)
|
||||
pq_df = reader(filename, columns=["name", "geom2"])
|
||||
|
||||
assert_geodataframe_equal(df.set_geometry("geom2")[["name", "geom2"]], pq_df)
|
||||
|
||||
df["geom3"] = df.geometry.copy()
|
||||
|
||||
writer(df, filename)
|
||||
with pytest.warns(
|
||||
UserWarning,
|
||||
match="Multiple non-primary geometry columns read from Parquet/Feather file.",
|
||||
):
|
||||
pq_df = reader(filename, columns=["name", "geom2", "geom3"])
|
||||
|
||||
assert_geodataframe_equal(
|
||||
df.set_geometry("geom2")[["name", "geom2", "geom3"]], pq_df
|
||||
)
|
||||
|
||||
|
||||
def test_columns_no_geometry(tmpdir, file_format):
|
||||
"""Reading a parquet file that is missing all of the geometry columns
|
||||
should raise a ValueError"""
|
||||
reader, writer = file_format
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
writer(df, filename)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
reader(filename, columns=["name"])
|
||||
|
||||
|
||||
def test_missing_crs(tmpdir, file_format):
|
||||
"""If CRS is `None`, it should be properly handled
|
||||
and remain `None` when read from parquet`.
|
||||
"""
|
||||
reader, writer = file_format
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
|
||||
df = read_file(get_path(test_dataset))
|
||||
df.crs = None
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
writer(df, filename)
|
||||
pq_df = reader(filename)
|
||||
|
||||
assert pq_df.crs is None
|
||||
|
||||
assert_geodataframe_equal(df, pq_df, check_crs=True)
|
||||
|
||||
|
||||
def test_default_geo_col_writes(tmp_path):
|
||||
# edge case geo col name None writes successfully
|
||||
df = GeoDataFrame({"a": [1, 2]})
|
||||
df.to_parquet(tmp_path / "test.pq")
|
||||
# cannot be round tripped as gdf due to invalid geom col
|
||||
pq_df = pd_read_parquet(tmp_path / "test.pq")
|
||||
assert_frame_equal(df, pq_df)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
Version(pyarrow.__version__) >= Version("0.17.0"),
|
||||
reason="Feather only supported for pyarrow >= 0.17",
|
||||
)
|
||||
def test_feather_arrow_version(tmpdir):
|
||||
df = read_file(get_path("naturalearth_lowres"))
|
||||
filename = os.path.join(str(tmpdir), "test.feather")
|
||||
|
||||
with pytest.raises(
|
||||
ImportError, match="pyarrow >= 0.17 required for Feather support"
|
||||
):
|
||||
df.to_feather(filename)
|
||||
|
||||
|
||||
def test_fsspec_url():
|
||||
fsspec = pytest.importorskip("fsspec")
|
||||
import fsspec.implementations.memory
|
||||
|
||||
class MyMemoryFileSystem(fsspec.implementations.memory.MemoryFileSystem):
|
||||
# Simple fsspec filesystem that adds a required keyword.
|
||||
# Attempting to use this filesystem without the keyword will raise an exception.
|
||||
def __init__(self, is_set, *args, **kwargs):
|
||||
self.is_set = is_set
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
fsspec.register_implementation("memory", MyMemoryFileSystem, clobber=True)
|
||||
memfs = MyMemoryFileSystem(is_set=True)
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
with memfs.open("data.parquet", "wb") as f:
|
||||
df.to_parquet(f)
|
||||
|
||||
result = read_parquet("memory://data.parquet", storage_options={"is_set": True})
|
||||
assert_geodataframe_equal(result, df)
|
||||
|
||||
result = read_parquet("memory://data.parquet", filesystem=memfs)
|
||||
assert_geodataframe_equal(result, df)
|
||||
|
||||
# reset fsspec registry
|
||||
fsspec.register_implementation(
|
||||
"memory", fsspec.implementations.memory.MemoryFileSystem, clobber=True
|
||||
)
|
||||
|
||||
|
||||
def test_non_fsspec_url_with_storage_options_raises():
|
||||
with pytest.raises(ValueError, match="storage_options"):
|
||||
test_dataset = "naturalearth_lowres"
|
||||
read_parquet(get_path(test_dataset), storage_options={"foo": "bar"})
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
Version(pyarrow.__version__) < Version("5.0.0"),
|
||||
reason="pyarrow.fs requires pyarrow>=5.0.0",
|
||||
)
|
||||
def test_prefers_pyarrow_fs():
|
||||
filesystem, _ = _get_filesystem_path("file:///data.parquet")
|
||||
assert isinstance(filesystem, pyarrow.fs.LocalFileSystem)
|
||||
|
||||
|
||||
def test_write_read_parquet_expand_user():
|
||||
gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
|
||||
test_file = "~/test_file.parquet"
|
||||
gdf.to_parquet(test_file)
|
||||
pq_df = geopandas.read_parquet(test_file)
|
||||
assert_geodataframe_equal(gdf, pq_df, check_crs=True)
|
||||
os.remove(os.path.expanduser(test_file))
|
||||
|
||||
|
||||
def test_write_read_feather_expand_user():
|
||||
gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
|
||||
test_file = "~/test_file.feather"
|
||||
gdf.to_feather(test_file)
|
||||
f_df = geopandas.read_feather(test_file)
|
||||
assert_geodataframe_equal(gdf, f_df, check_crs=True)
|
||||
os.remove(os.path.expanduser(test_file))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("geometry", [[], [None]])
|
||||
def test_write_empty_bbox(tmpdir, geometry):
|
||||
# empty dataframe or all missing geometries -> avoid bbox with NaNs
|
||||
gdf = geopandas.GeoDataFrame({"col": [1] * len(geometry)}, geometry=geometry)
|
||||
gdf.to_parquet(tmpdir / "test.parquet")
|
||||
|
||||
from pyarrow.parquet import read_table
|
||||
|
||||
table = read_table(tmpdir / "test.parquet")
|
||||
metadata = json.loads(table.schema.metadata[b"geo"])
|
||||
assert "encoding" in metadata["columns"]["geometry"]
|
||||
assert "bbox" not in metadata["columns"]["geometry"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("format", ["feather", "parquet"])
|
||||
def test_write_read_default_crs(tmpdir, format):
|
||||
if format == "feather":
|
||||
from pyarrow.feather import write_feather as write
|
||||
else:
|
||||
from pyarrow.parquet import write_table as write
|
||||
|
||||
filename = os.path.join(str(tmpdir), f"test.{format}")
|
||||
gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)])
|
||||
table = _geopandas_to_arrow(gdf)
|
||||
|
||||
# update the geo metadata to strip 'crs' entry
|
||||
metadata = table.schema.metadata
|
||||
geo_metadata = _decode_metadata(metadata[b"geo"])
|
||||
del geo_metadata["columns"]["geometry"]["crs"]
|
||||
metadata.update({b"geo": _encode_metadata(geo_metadata)})
|
||||
table = table.replace_schema_metadata(metadata)
|
||||
|
||||
write(table, filename)
|
||||
|
||||
read = getattr(geopandas, f"read_{format}")
|
||||
df = read(filename)
|
||||
assert df.crs.equals(pyproj.CRS("OGC:CRS84"))
|
||||
|
||||
|
||||
def test_write_iso_wkb(tmpdir):
|
||||
gdf = geopandas.GeoDataFrame(
|
||||
geometry=geopandas.GeoSeries.from_wkt(["POINT Z (1 2 3)"])
|
||||
)
|
||||
if compat.USE_SHAPELY_20:
|
||||
gdf.to_parquet(tmpdir / "test.parquet")
|
||||
else:
|
||||
with pytest.warns(UserWarning, match="The GeoDataFrame contains 3D geometries"):
|
||||
gdf.to_parquet(tmpdir / "test.parquet")
|
||||
|
||||
from pyarrow.parquet import read_table
|
||||
|
||||
table = read_table(tmpdir / "test.parquet")
|
||||
wkb = table["geometry"][0].as_py().hex()
|
||||
|
||||
if compat.USE_SHAPELY_20:
|
||||
# correct ISO flavor
|
||||
assert wkb == "01e9030000000000000000f03f00000000000000400000000000000840"
|
||||
else:
|
||||
assert wkb == "0101000080000000000000f03f00000000000000400000000000000840"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"format,schema_version",
|
||||
product(["feather", "parquet"], [None] + SUPPORTED_VERSIONS),
|
||||
)
|
||||
def test_write_spec_version(tmpdir, format, schema_version):
|
||||
if format == "feather":
|
||||
from pyarrow.feather import read_table
|
||||
else:
|
||||
from pyarrow.parquet import read_table
|
||||
|
||||
filename = os.path.join(str(tmpdir), f"test.{format}")
|
||||
gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="EPSG:4326")
|
||||
write = getattr(gdf, f"to_{format}")
|
||||
write(filename, schema_version=schema_version)
|
||||
|
||||
# ensure that we can roundtrip data regardless of version
|
||||
read = getattr(geopandas, f"read_{format}")
|
||||
df = read(filename)
|
||||
assert_geodataframe_equal(df, gdf)
|
||||
|
||||
# verify the correct version is written in the metadata
|
||||
schema_version = schema_version or METADATA_VERSION
|
||||
table = read_table(filename)
|
||||
metadata = json.loads(table.schema.metadata[b"geo"])
|
||||
assert metadata["version"] == schema_version
|
||||
|
||||
# verify that CRS is correctly handled between versions
|
||||
if schema_version == "0.1.0":
|
||||
assert metadata["columns"]["geometry"]["crs"] == gdf.crs.to_wkt()
|
||||
|
||||
else:
|
||||
crs_expected = gdf.crs.to_json_dict()
|
||||
_remove_id_from_member_of_ensembles(crs_expected)
|
||||
assert metadata["columns"]["geometry"]["crs"] == crs_expected
|
||||
|
||||
# verify that geometry_type(s) is correctly handled between versions
|
||||
if Version(schema_version) <= Version("0.4.0"):
|
||||
assert "geometry_type" in metadata["columns"]["geometry"]
|
||||
assert metadata["columns"]["geometry"]["geometry_type"] == "Polygon"
|
||||
else:
|
||||
assert "geometry_types" in metadata["columns"]["geometry"]
|
||||
assert metadata["columns"]["geometry"]["geometry_types"] == ["Polygon"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"format,version", product(["feather", "parquet"], [None] + SUPPORTED_VERSIONS)
|
||||
)
|
||||
def test_write_deprecated_version_parameter(tmpdir, format, version):
|
||||
if format == "feather":
|
||||
from pyarrow.feather import read_table
|
||||
|
||||
version = version or 2
|
||||
|
||||
else:
|
||||
from pyarrow.parquet import read_table
|
||||
|
||||
version = version or "2.6"
|
||||
|
||||
filename = os.path.join(str(tmpdir), f"test.{format}")
|
||||
gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="EPSG:4326")
|
||||
write = getattr(gdf, f"to_{format}")
|
||||
|
||||
if version in SUPPORTED_VERSIONS:
|
||||
with pytest.warns(
|
||||
FutureWarning,
|
||||
match="the `version` parameter has been replaced with `schema_version`",
|
||||
):
|
||||
write(filename, version=version)
|
||||
|
||||
else:
|
||||
# no warning raised if not one of the captured versions
|
||||
write(filename, version=version)
|
||||
|
||||
table = read_table(filename)
|
||||
metadata = json.loads(table.schema.metadata[b"geo"])
|
||||
|
||||
if version in SUPPORTED_VERSIONS:
|
||||
# version is captured as a parameter
|
||||
assert metadata["version"] == version
|
||||
else:
|
||||
# version is passed to underlying writer
|
||||
assert metadata["version"] == METADATA_VERSION
|
||||
|
||||
|
||||
@pytest.mark.parametrize("version", ["0.1.0", "0.4.0", "1.0.0-beta.1"])
|
||||
def test_read_versioned_file(version):
|
||||
"""
|
||||
Verify that files for different metadata spec versions can be read
|
||||
created for each supported version:
|
||||
|
||||
# small dummy test dataset (not naturalearth_lowres, as this can change over time)
|
||||
from shapely.geometry import box, MultiPolygon
|
||||
df = geopandas.GeoDataFrame(
|
||||
{"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]},
|
||||
geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5,5)],
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
df.to_feather(DATA_PATH / 'arrow' / f'test_data_v{METADATA_VERSION}.feather')
|
||||
df.to_parquet(DATA_PATH / 'arrow' / f'test_data_v{METADATA_VERSION}.parquet')
|
||||
"""
|
||||
expected = geopandas.GeoDataFrame(
|
||||
{"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]},
|
||||
geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5, 5)],
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
df = geopandas.read_feather(DATA_PATH / "arrow" / f"test_data_v{version}.feather")
|
||||
assert_geodataframe_equal(df, expected, check_crs=True)
|
||||
|
||||
df = geopandas.read_parquet(DATA_PATH / "arrow" / f"test_data_v{version}.parquet")
|
||||
assert_geodataframe_equal(df, expected, check_crs=True)
|
||||
|
||||
|
||||
def test_read_gdal_files():
|
||||
"""
|
||||
Verify that files written by GDAL can be read by geopandas.
|
||||
Since it is currently not yet straightforward to install GDAL with
|
||||
Parquet/Arrow enabled in our conda setup, we are testing with some
|
||||
generated files included in the repo (using GDAL 3.5.0):
|
||||
|
||||
# small dummy test dataset (not naturalearth_lowres, as this can change over time)
|
||||
from shapely.geometry import box, MultiPolygon
|
||||
df = geopandas.GeoDataFrame(
|
||||
{"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]},
|
||||
geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5,5)],
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
df.to_file("test_data.gpkg", GEOMETRY_NAME="geometry")
|
||||
and then the gpkg file is converted to Parquet/Arrow with:
|
||||
$ ogr2ogr -f Parquet -lco FID= test_data_gdal350.parquet test_data.gpkg
|
||||
$ ogr2ogr -f Arrow -lco FID= -lco GEOMETRY_ENCODING=WKB test_data_gdal350.arrow test_data.gpkg
|
||||
""" # noqa: E501
|
||||
expected = geopandas.GeoDataFrame(
|
||||
{"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]},
|
||||
geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5, 5)],
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
df = geopandas.read_parquet(DATA_PATH / "arrow" / "test_data_gdal350.parquet")
|
||||
assert_geodataframe_equal(df, expected, check_crs=True)
|
||||
|
||||
df = geopandas.read_feather(DATA_PATH / "arrow" / "test_data_gdal350.arrow")
|
||||
assert_geodataframe_equal(df, expected, check_crs=True)
|
||||
|
||||
|
||||
def test_parquet_read_partitioned_dataset(tmpdir):
|
||||
# we don't yet explicitly support this (in writing), but for Parquet it
|
||||
# works for reading (by relying on pyarrow.read_table)
|
||||
df = read_file(get_path("naturalearth_lowres"))
|
||||
|
||||
# manually create partitioned dataset
|
||||
basedir = tmpdir / "partitioned_dataset"
|
||||
basedir.mkdir()
|
||||
df[:100].to_parquet(basedir / "data1.parquet")
|
||||
df[100:].to_parquet(basedir / "data2.parquet")
|
||||
|
||||
result = read_parquet(basedir)
|
||||
assert_geodataframe_equal(result, df)
|
||||
|
||||
|
||||
def test_parquet_read_partitioned_dataset_fsspec(tmpdir):
|
||||
fsspec = pytest.importorskip("fsspec")
|
||||
|
||||
df = read_file(get_path("naturalearth_lowres"))
|
||||
|
||||
# manually create partitioned dataset
|
||||
memfs = fsspec.filesystem("memory")
|
||||
memfs.mkdir("partitioned_dataset")
|
||||
with memfs.open("partitioned_dataset/data1.parquet", "wb") as f:
|
||||
df[:100].to_parquet(f)
|
||||
with memfs.open("partitioned_dataset/data2.parquet", "wb") as f:
|
||||
df[100:].to_parquet(f)
|
||||
|
||||
result = read_parquet("memory://partitioned_dataset")
|
||||
assert_geodataframe_equal(result, df)
|
||||
1178
.venv/lib/python3.12/site-packages/geopandas/io/tests/test_file.py
Normal file
1178
.venv/lib/python3.12/site-packages/geopandas/io/tests/test_file.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,307 @@
|
||||
import os
|
||||
|
||||
from shapely.geometry import (
|
||||
LineString,
|
||||
MultiLineString,
|
||||
MultiPoint,
|
||||
MultiPolygon,
|
||||
Point,
|
||||
Polygon,
|
||||
)
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame
|
||||
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
import pytest
|
||||
|
||||
from .test_file import FIONA_MARK, PYOGRIO_MARK
|
||||
|
||||
|
||||
# Credit: Polygons below come from Montreal city Open Data portal
|
||||
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
|
||||
city_hall_boundaries = Polygon(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
)
|
||||
vauquelin_place = Polygon(
|
||||
(
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5548825850032, 45.5084033554357),
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
)
|
||||
)
|
||||
|
||||
city_hall_walls = [
|
||||
LineString(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
)
|
||||
),
|
||||
LineString(
|
||||
(
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
city_hall_entrance = Point(-73.553785, 45.508722)
|
||||
city_hall_balcony = Point(-73.554138, 45.509080)
|
||||
city_hall_council_chamber = Point(-73.554246, 45.508931)
|
||||
|
||||
point_3D = Point(-73.553785, 45.508722, 300)
|
||||
|
||||
|
||||
# *****************************************
|
||||
# TEST TOOLING
|
||||
|
||||
|
||||
class _ExpectedError:
|
||||
def __init__(self, error_type, error_message_match):
|
||||
self.type = error_type
|
||||
self.match = error_message_match
|
||||
|
||||
|
||||
class _ExpectedErrorBuilder:
|
||||
def __init__(self, composite_key):
|
||||
self.composite_key = composite_key
|
||||
|
||||
def to_raise(self, error_type, error_match):
|
||||
_expected_exceptions[self.composite_key] = _ExpectedError(
|
||||
error_type, error_match
|
||||
)
|
||||
|
||||
|
||||
def _expect_writing(gdf, ogr_driver):
|
||||
return _ExpectedErrorBuilder(_composite_key(gdf, ogr_driver))
|
||||
|
||||
|
||||
def _composite_key(gdf, ogr_driver):
|
||||
return frozenset([id(gdf), ogr_driver])
|
||||
|
||||
|
||||
def _expected_error_on(gdf, ogr_driver):
|
||||
composite_key = _composite_key(gdf, ogr_driver)
|
||||
return _expected_exceptions.get(composite_key, None)
|
||||
|
||||
|
||||
# *****************************************
|
||||
# TEST CASES
|
||||
_geodataframes_to_write = []
|
||||
_expected_exceptions = {}
|
||||
_CRS = "epsg:4326"
|
||||
|
||||
# ------------------
|
||||
# gdf with Points
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]}, crs=_CRS, geometry=[city_hall_entrance, city_hall_balcony]
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with MultiPoints
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPoint([city_hall_balcony, city_hall_council_chamber]),
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony, city_hall_council_chamber]),
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with Points and MultiPoints
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiPoint([city_hall_entrance, city_hall_balcony]), city_hall_balcony],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
# 'ESRI Shapefile' driver supports writing LineString/MultiLinestring and
|
||||
# Polygon/MultiPolygon but does not mention Point/MultiPoint
|
||||
# see https://www.gdal.org/drv_shapefile.html
|
||||
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
|
||||
|
||||
# ------------------
|
||||
# gdf with LineStrings
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=city_hall_walls)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with MultiLineStrings
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiLineString(city_hall_walls), MultiLineString(city_hall_walls)],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with LineStrings and MultiLineStrings
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with Polygons
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]}, crs=_CRS, geometry=[city_hall_boundaries, vauquelin_place]
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with MultiPolygon
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with Polygon and MultiPolygon
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with null geometry and Point
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, city_hall_entrance])
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with null geometry and 3D Point
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, point_3D])
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with null geometries only
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, None])
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with all shape types mixed together
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2, 3, 4, 5, 6]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_entrance,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
# Not supported by 'ESRI Shapefile' driver
|
||||
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
|
||||
|
||||
# ------------------
|
||||
# gdf with all 2D shape types and 3D Point mixed together
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2, 3, 4, 5, 6, 7]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_entrance,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
point_3D,
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
# Not supported by 'ESRI Shapefile' driver
|
||||
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
|
||||
|
||||
|
||||
@pytest.fixture(params=_geodataframes_to_write)
|
||||
def geodataframe(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["GeoJSON", "ESRI Shapefile", "GPKG", "SQLite"])
|
||||
def ogr_driver(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pytest.param("fiona", marks=FIONA_MARK),
|
||||
pytest.param("pyogrio", marks=PYOGRIO_MARK),
|
||||
]
|
||||
)
|
||||
def engine(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
|
||||
output_file = os.path.join(str(tmpdir), "output_file")
|
||||
write_kwargs = {}
|
||||
if ogr_driver == "SQLite":
|
||||
write_kwargs["spatialite"] = True
|
||||
|
||||
# This if statement can be removed once minimal fiona version >= 1.8.20
|
||||
if engine == "fiona":
|
||||
import fiona
|
||||
from packaging.version import Version
|
||||
|
||||
if Version(fiona.__version__) < Version("1.8.20"):
|
||||
pytest.skip("SQLite driver only available from version 1.8.20")
|
||||
|
||||
# If only 3D Points, geometry_type needs to be specified for spatialite at the
|
||||
# moment. This if can be removed once the following PR is released:
|
||||
# https://github.com/geopandas/pyogrio/pull/223
|
||||
if (
|
||||
engine == "pyogrio"
|
||||
and len(geodataframe == 2)
|
||||
and geodataframe.geometry[0] is None
|
||||
and geodataframe.geometry[1] is not None
|
||||
and geodataframe.geometry[1].has_z
|
||||
):
|
||||
write_kwargs["geometry_type"] = "Point Z"
|
||||
|
||||
expected_error = _expected_error_on(geodataframe, ogr_driver)
|
||||
if expected_error:
|
||||
with pytest.raises(
|
||||
RuntimeError, match="Failed to write record|Could not add feature to layer"
|
||||
):
|
||||
geodataframe.to_file(
|
||||
output_file, driver=ogr_driver, engine=engine, **write_kwargs
|
||||
)
|
||||
else:
|
||||
geodataframe.to_file(
|
||||
output_file, driver=ogr_driver, engine=engine, **write_kwargs
|
||||
)
|
||||
|
||||
reloaded = geopandas.read_file(output_file, engine=engine)
|
||||
|
||||
if ogr_driver == "GeoJSON" and engine == "pyogrio":
|
||||
# For GeoJSON files, the int64 column comes back as int32
|
||||
reloaded["a"] = reloaded["a"].astype("int64")
|
||||
|
||||
assert_geodataframe_equal(geodataframe, reloaded, check_column_type="equiv")
|
||||
@@ -0,0 +1,304 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
from shapely.geometry import (
|
||||
LineString,
|
||||
MultiLineString,
|
||||
MultiPoint,
|
||||
MultiPolygon,
|
||||
Point,
|
||||
Polygon,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import numpy as np
|
||||
from geopandas import GeoDataFrame
|
||||
from geopandas.io.file import infer_schema
|
||||
|
||||
# Credit: Polygons below come from Montreal city Open Data portal
|
||||
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
|
||||
city_hall_boundaries = Polygon(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
)
|
||||
vauquelin_place = Polygon(
|
||||
(
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5548825850032, 45.5084033554357),
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
)
|
||||
)
|
||||
|
||||
city_hall_walls = [
|
||||
LineString(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
)
|
||||
),
|
||||
LineString(
|
||||
(
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
city_hall_entrance = Point(-73.553785, 45.508722)
|
||||
city_hall_balcony = Point(-73.554138, 45.509080)
|
||||
city_hall_council_chamber = Point(-73.554246, 45.508931)
|
||||
|
||||
point_3D = Point(-73.553785, 45.508722, 300)
|
||||
linestring_3D = LineString(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661, 300),
|
||||
(-73.5546126200639, 45.5086813829106, 300),
|
||||
(-73.5540185061397, 45.5084409343852, 300),
|
||||
)
|
||||
)
|
||||
polygon_3D = Polygon(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661, 300),
|
||||
(-73.5535801792994, 45.5089539203786, 300),
|
||||
(-73.5541107525234, 45.5091983609661, 300),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_infer_schema_only_points():
|
||||
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_points_and_multipoints():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["MultiPoint", "Point"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_multipoints():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPoint(
|
||||
[city_hall_entrance, city_hall_balcony, city_hall_council_chamber]
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {"geometry": "MultiPoint", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_only_linestrings():
|
||||
df = GeoDataFrame(geometry=city_hall_walls)
|
||||
|
||||
assert infer_schema(df) == {"geometry": "LineString", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_linestrings_and_multilinestrings():
|
||||
df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["MultiLineString", "LineString"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_multilinestrings():
|
||||
df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls)])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "MultiLineString",
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_polygons():
|
||||
df = GeoDataFrame(geometry=[city_hall_boundaries, vauquelin_place])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "Polygon", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_polygons_and_multipolygons():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["MultiPolygon", "Polygon"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_multipolygons():
|
||||
df = GeoDataFrame(geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "MultiPolygon", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_multiple_shape_types():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": [
|
||||
"MultiPolygon",
|
||||
"Polygon",
|
||||
"MultiLineString",
|
||||
"LineString",
|
||||
"MultiPoint",
|
||||
"Point",
|
||||
],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_shape_type():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
point_3D,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": [
|
||||
"3D Point",
|
||||
"MultiPolygon",
|
||||
"Polygon",
|
||||
"MultiLineString",
|
||||
"LineString",
|
||||
"MultiPoint",
|
||||
"Point",
|
||||
],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_Point():
|
||||
df = GeoDataFrame(geometry=[city_hall_balcony, point_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["3D Point", "Point"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_3D_Points():
|
||||
df = GeoDataFrame(geometry=[point_3D, point_3D])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_linestring():
|
||||
df = GeoDataFrame(geometry=[city_hall_walls[0], linestring_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["3D LineString", "LineString"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_3D_linestrings():
|
||||
df = GeoDataFrame(geometry=[linestring_3D, linestring_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "3D LineString",
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_Polygon():
|
||||
df = GeoDataFrame(geometry=[city_hall_boundaries, polygon_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["3D Polygon", "Polygon"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_3D_Polygons():
|
||||
df = GeoDataFrame(geometry=[polygon_3D, polygon_3D])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "3D Polygon", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_null_geometry_and_2D_point():
|
||||
df = GeoDataFrame(geometry=[None, city_hall_entrance])
|
||||
|
||||
# None geometry type is then omitted
|
||||
assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_null_geometry_and_3D_point():
|
||||
df = GeoDataFrame(geometry=[None, point_3D])
|
||||
|
||||
# None geometry type is then omitted
|
||||
assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_null_geometry_all():
|
||||
df = GeoDataFrame(geometry=[None, None])
|
||||
|
||||
# None geometry type in then replaced by 'Unknown'
|
||||
# (default geometry type supported by Fiona)
|
||||
assert infer_schema(df) == {"geometry": "Unknown", "properties": OrderedDict()}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"array_data,dtype", [([1, 2**31 - 1], np.int32), ([1, np.nan], pd.Int32Dtype())]
|
||||
)
|
||||
def test_infer_schema_int32(array_data, dtype):
|
||||
int32col = pd.array(data=array_data, dtype=dtype)
|
||||
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
|
||||
df["int32_column"] = int32col
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "Point",
|
||||
"properties": OrderedDict([("int32_column", "int32")]),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_int64():
|
||||
int64col = pd.array([1, np.nan], dtype=pd.Int64Dtype())
|
||||
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
|
||||
df["int64_column"] = int64col
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "Point",
|
||||
"properties": OrderedDict([("int64_column", "int")]),
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
"""
|
||||
See generate_legacy_storage_files.py for the creation of the legacy files.
|
||||
|
||||
"""
|
||||
from contextlib import contextmanager
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
from geopandas import _compat as compat
|
||||
import geopandas
|
||||
from shapely.geometry import Point
|
||||
|
||||
DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def current_pickle_data():
|
||||
# our current version pickle data
|
||||
from .generate_legacy_storage_files import create_pickle_data
|
||||
|
||||
return create_pickle_data()
|
||||
|
||||
|
||||
files = glob.glob(str(DATA_PATH / "pickle" / "*.pickle"))
|
||||
|
||||
|
||||
@pytest.fixture(params=files, ids=[p.split("/")[-1] for p in files])
|
||||
def legacy_pickle(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@contextmanager
|
||||
def with_use_pygeos(option):
|
||||
orig = geopandas.options.use_pygeos
|
||||
geopandas.options.use_pygeos = option
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
geopandas.options.use_pygeos = orig
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
compat.USE_SHAPELY_20 or compat.USE_PYGEOS,
|
||||
reason=(
|
||||
"shapely 2.0/pygeos-based unpickling currently only works for "
|
||||
"shapely-2.0/pygeos-written files"
|
||||
),
|
||||
)
|
||||
def test_legacy_pickles(current_pickle_data, legacy_pickle):
|
||||
result = pd.read_pickle(legacy_pickle)
|
||||
|
||||
for name, value in result.items():
|
||||
expected = current_pickle_data[name]
|
||||
assert_geodataframe_equal(value, expected)
|
||||
|
||||
|
||||
def test_round_trip_current(tmpdir, current_pickle_data):
|
||||
data = current_pickle_data
|
||||
|
||||
for name, value in data.items():
|
||||
path = str(tmpdir / "{}.pickle".format(name))
|
||||
value.to_pickle(path)
|
||||
result = pd.read_pickle(path)
|
||||
assert_geodataframe_equal(result, value)
|
||||
assert isinstance(result.has_sindex, bool)
|
||||
|
||||
|
||||
def _create_gdf():
|
||||
return geopandas.GeoDataFrame(
|
||||
{"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYGEOS, reason="requires pygeos to test #1745")
|
||||
def test_pygeos_switch(tmpdir):
|
||||
# writing and reading with pygeos disabled
|
||||
with with_use_pygeos(False):
|
||||
gdf = _create_gdf()
|
||||
path = str(tmpdir / "gdf_crs1.pickle")
|
||||
gdf.to_pickle(path)
|
||||
result = pd.read_pickle(path)
|
||||
assert_geodataframe_equal(result, gdf)
|
||||
|
||||
# writing without pygeos, reading with pygeos
|
||||
with with_use_pygeos(False):
|
||||
gdf = _create_gdf()
|
||||
path = str(tmpdir / "gdf_crs1.pickle")
|
||||
gdf.to_pickle(path)
|
||||
|
||||
with with_use_pygeos(True):
|
||||
result = pd.read_pickle(path)
|
||||
gdf = _create_gdf()
|
||||
assert_geodataframe_equal(result, gdf)
|
||||
|
||||
# writing with pygeos, reading without pygeos
|
||||
with with_use_pygeos(True):
|
||||
gdf = _create_gdf()
|
||||
path = str(tmpdir / "gdf_crs1.pickle")
|
||||
gdf.to_pickle(path)
|
||||
|
||||
with with_use_pygeos(False):
|
||||
result = pd.read_pickle(path)
|
||||
gdf = _create_gdf()
|
||||
assert_geodataframe_equal(result, gdf)
|
||||
@@ -0,0 +1,752 @@
|
||||
"""
|
||||
Tests here include reading/writing to different types of spatial databases.
|
||||
The spatial database tests may not work without additional system
|
||||
configuration. postGIS tests require a test database to have been setup;
|
||||
see geopandas.tests.util for more information.
|
||||
"""
|
||||
import os
|
||||
import warnings
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame, read_file, read_postgis
|
||||
|
||||
import geopandas._compat as compat
|
||||
from geopandas.io.sql import _get_conn as get_conn, _write_postgis as write_postgis
|
||||
from geopandas.tests.util import create_postgis, create_spatialite, validate_boro_df
|
||||
import pytest
|
||||
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
except ImportError:
|
||||
# Avoid local imports for text in all sqlalchemy tests
|
||||
# all tests using text use engine_postgis, which ensures sqlalchemy is available
|
||||
text = str
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_nybb():
|
||||
nybb_path = geopandas.datasets.get_path("nybb")
|
||||
df = read_file(nybb_path)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def connection_postgis():
|
||||
"""
|
||||
Initiates a connection to a postGIS database that must already exist.
|
||||
See create_postgis for more information.
|
||||
"""
|
||||
psycopg2 = pytest.importorskip("psycopg2")
|
||||
from psycopg2 import OperationalError
|
||||
|
||||
dbname = "test_geopandas"
|
||||
user = os.environ.get("PGUSER")
|
||||
password = os.environ.get("PGPASSWORD")
|
||||
host = os.environ.get("PGHOST")
|
||||
port = os.environ.get("PGPORT")
|
||||
try:
|
||||
con = psycopg2.connect(
|
||||
dbname=dbname, user=user, password=password, host=host, port=port
|
||||
)
|
||||
except OperationalError:
|
||||
pytest.skip("Cannot connect with postgresql database")
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", message="pandas only supports SQLAlchemy connectable.*"
|
||||
)
|
||||
yield con
|
||||
con.close()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def engine_postgis():
|
||||
"""
|
||||
Initiates a connection engine to a postGIS database that must already exist.
|
||||
"""
|
||||
sqlalchemy = pytest.importorskip("sqlalchemy")
|
||||
from sqlalchemy.engine.url import URL
|
||||
|
||||
user = os.environ.get("PGUSER")
|
||||
password = os.environ.get("PGPASSWORD")
|
||||
host = os.environ.get("PGHOST")
|
||||
port = os.environ.get("PGPORT")
|
||||
dbname = "test_geopandas"
|
||||
|
||||
try:
|
||||
con = sqlalchemy.create_engine(
|
||||
URL.create(
|
||||
drivername="postgresql+psycopg2",
|
||||
username=user,
|
||||
database=dbname,
|
||||
password=password,
|
||||
host=host,
|
||||
port=port,
|
||||
)
|
||||
)
|
||||
con.connect()
|
||||
except Exception:
|
||||
pytest.skip("Cannot connect with postgresql database")
|
||||
|
||||
yield con
|
||||
con.dispose()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def connection_spatialite():
|
||||
"""
|
||||
Return a memory-based SQLite3 connection with SpatiaLite enabled & initialized.
|
||||
|
||||
`The sqlite3 module must be built with loadable extension support
|
||||
<https://docs.python.org/3/library/sqlite3.html#f1>`_ and
|
||||
`SpatiaLite <https://www.gaia-gis.it/fossil/libspatialite/index>`_
|
||||
must be available on the system as a SQLite module.
|
||||
Packages available on Anaconda meet requirements.
|
||||
|
||||
Exceptions
|
||||
----------
|
||||
``AttributeError`` on missing support for loadable SQLite extensions
|
||||
``sqlite3.OperationalError`` on missing SpatiaLite
|
||||
"""
|
||||
sqlite3 = pytest.importorskip("sqlite3")
|
||||
try:
|
||||
with sqlite3.connect(":memory:") as con:
|
||||
con.enable_load_extension(True)
|
||||
con.load_extension("mod_spatialite")
|
||||
con.execute("SELECT InitSpatialMetaData(TRUE)")
|
||||
except Exception:
|
||||
con.close()
|
||||
pytest.skip("Cannot setup spatialite database")
|
||||
|
||||
yield con
|
||||
con.close()
|
||||
|
||||
|
||||
def drop_table_if_exists(conn_or_engine, table):
|
||||
sqlalchemy = pytest.importorskip("sqlalchemy")
|
||||
|
||||
if sqlalchemy.inspect(conn_or_engine).has_table(table):
|
||||
metadata = sqlalchemy.MetaData()
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", message="Did not recognize type 'geometry' of column.*"
|
||||
)
|
||||
metadata.reflect(conn_or_engine)
|
||||
table = metadata.tables.get(table)
|
||||
if table is not None:
|
||||
table.drop(conn_or_engine, checkfirst=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_mixed_single_and_multi():
|
||||
from shapely.geometry import Point, LineString, MultiLineString
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": [
|
||||
LineString([(0, 0), (1, 1)]),
|
||||
MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
|
||||
Point(0, 1),
|
||||
]
|
||||
},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_geom_collection():
|
||||
from shapely.geometry import Point, LineString, Polygon, GeometryCollection
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": [
|
||||
GeometryCollection(
|
||||
[
|
||||
Polygon([(0, 0), (1, 1), (0, 1)]),
|
||||
LineString([(0, 0), (1, 1)]),
|
||||
Point(0, 0),
|
||||
]
|
||||
)
|
||||
]
|
||||
},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_linear_ring():
|
||||
from shapely.geometry import LinearRing
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{"geometry": [LinearRing(((0, 0), (0, 1), (1, 1), (1, 0)))]}, crs="epsg:4326"
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_3D_geoms():
|
||||
from shapely.geometry import Point, LineString, Polygon
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": [
|
||||
LineString([(0, 0, 0), (1, 1, 1)]),
|
||||
Polygon([(0, 0, 0), (1, 1, 1), (0, 1, 1)]),
|
||||
Point(0, 1, 2),
|
||||
]
|
||||
},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
class TestIO:
|
||||
def test_get_conn(self, engine_postgis):
|
||||
Connection = pytest.importorskip("sqlalchemy.engine.base").Connection
|
||||
|
||||
engine = engine_postgis
|
||||
with get_conn(engine) as output:
|
||||
assert isinstance(output, Connection)
|
||||
with engine.connect() as conn:
|
||||
with get_conn(conn) as output:
|
||||
assert isinstance(output, Connection)
|
||||
with pytest.raises(ValueError):
|
||||
with get_conn(object()):
|
||||
pass
|
||||
|
||||
def test_read_postgis_default(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con)
|
||||
|
||||
validate_boro_df(df)
|
||||
# no crs defined on the created geodatabase, and none specified
|
||||
# by user; should not be set to 0, as from get_srid failure
|
||||
assert df.crs is None
|
||||
|
||||
def test_read_postgis_custom_geom_col(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
geom_col = "the_geom"
|
||||
create_postgis(con, df_nybb, geom_col=geom_col)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con, geom_col=geom_col)
|
||||
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_read_postgis_select_geom_as(self, connection_postgis, df_nybb):
|
||||
"""Tests that a SELECT {geom} AS {some_other_geom} works."""
|
||||
con = connection_postgis
|
||||
orig_geom = "geom"
|
||||
out_geom = "the_geom"
|
||||
create_postgis(con, df_nybb, geom_col=orig_geom)
|
||||
|
||||
sql = """SELECT borocode, boroname, shape_leng, shape_area,
|
||||
{} as {} FROM nybb;""".format(
|
||||
orig_geom, out_geom
|
||||
)
|
||||
df = read_postgis(sql, con, geom_col=out_geom)
|
||||
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_read_postgis_get_srid(self, connection_postgis, df_nybb):
|
||||
"""Tests that an SRID can be read from a geodatabase (GH #451)."""
|
||||
con = connection_postgis
|
||||
crs = "epsg:4269"
|
||||
df_reproj = df_nybb.to_crs(crs)
|
||||
create_postgis(con, df_reproj, srid=4269)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con)
|
||||
|
||||
validate_boro_df(df)
|
||||
assert df.crs == crs
|
||||
|
||||
def test_read_postgis_override_srid(self, connection_postgis, df_nybb):
|
||||
"""Tests that a user specified CRS overrides the geodatabase SRID."""
|
||||
con = connection_postgis
|
||||
orig_crs = df_nybb.crs
|
||||
create_postgis(con, df_nybb, srid=4269)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con, crs=orig_crs)
|
||||
|
||||
validate_boro_df(df)
|
||||
assert df.crs == orig_crs
|
||||
|
||||
def test_from_postgis_default(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = GeoDataFrame.from_postgis(sql, con)
|
||||
|
||||
validate_boro_df(df, case_sensitive=False)
|
||||
|
||||
def test_from_postgis_custom_geom_col(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
geom_col = "the_geom"
|
||||
create_postgis(con, df_nybb, geom_col=geom_col)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col)
|
||||
|
||||
validate_boro_df(df, case_sensitive=False)
|
||||
|
||||
def test_read_postgis_null_geom(self, connection_spatialite, df_nybb):
|
||||
"""Tests that geometry with NULL is accepted."""
|
||||
con = connection_spatialite
|
||||
geom_col = df_nybb.geometry.name
|
||||
df_nybb.geometry.iat[0] = None
|
||||
create_spatialite(con, df_nybb)
|
||||
sql = (
|
||||
"SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
|
||||
'AsEWKB("{0}") AS "{0}" FROM nybb'.format(geom_col)
|
||||
)
|
||||
df = read_postgis(sql, con, geom_col=geom_col)
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_read_postgis_binary(self, connection_spatialite, df_nybb):
|
||||
"""Tests that geometry read as binary is accepted."""
|
||||
con = connection_spatialite
|
||||
geom_col = df_nybb.geometry.name
|
||||
create_spatialite(con, df_nybb)
|
||||
sql = (
|
||||
"SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
|
||||
'ST_AsBinary("{0}") AS "{0}" FROM nybb'.format(geom_col)
|
||||
)
|
||||
df = read_postgis(sql, con, geom_col=geom_col)
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_read_postgis_chunksize(self, connection_postgis, df_nybb):
|
||||
"""Test chunksize argument"""
|
||||
chunksize = 2
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
|
||||
|
||||
validate_boro_df(df)
|
||||
# no crs defined on the created geodatabase, and none specified
|
||||
# by user; should not be set to 0, as from get_srid failure
|
||||
assert df.crs is None
|
||||
|
||||
def test_read_postgis_privacy(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
with pytest.warns(FutureWarning):
|
||||
geopandas.io.sql.read_postgis(sql, con)
|
||||
|
||||
def test_write_postgis_default(self, engine_postgis, df_nybb):
|
||||
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
|
||||
engine = engine_postgis
|
||||
table = "nybb"
|
||||
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(engine, table)
|
||||
|
||||
# Write to db
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_write_postgis_uppercase_tablename(self, engine_postgis, df_nybb):
|
||||
"""Tests writing GeoDataFrame to PostGIS with uppercase tablename."""
|
||||
engine = engine_postgis
|
||||
table = "aTestTable"
|
||||
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(engine, table)
|
||||
|
||||
# Write to db
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
|
||||
# Validate
|
||||
sql = text('SELECT * FROM "{table}";'.format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_write_postgis_sqlalchemy_connection(self, engine_postgis, df_nybb):
|
||||
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
|
||||
with engine_postgis.begin() as con:
|
||||
table = "nybb_con"
|
||||
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(con, table)
|
||||
|
||||
# Write to db
|
||||
write_postgis(df_nybb, con=con, name=table, if_exists="fail")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, con, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_write_postgis_fail_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that uploading the same table raises error when: if_replace='fail'.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Ensure table exists
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
|
||||
try:
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
|
||||
except ValueError as e:
|
||||
if "already exists" in str(e):
|
||||
pass
|
||||
else:
|
||||
raise e
|
||||
|
||||
def test_write_postgis_replace_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that replacing a table is possible when: if_replace='replace'.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Ensure table exists
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# Overwrite
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_write_postgis_append_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that appending to existing table produces correct results when:
|
||||
if_replace='append'.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
orig_rows, orig_cols = df_nybb.shape
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="append")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
new_rows, new_cols = df.shape
|
||||
|
||||
# There should be twice as many rows in the new table
|
||||
assert new_rows == orig_rows * 2, (
|
||||
"There should be {target} rows,"
|
||||
"found: {current}".format(target=orig_rows * 2, current=new_rows),
|
||||
)
|
||||
# Number of columns should stay the same
|
||||
assert new_cols == orig_cols, (
|
||||
"There should be {target} columns,"
|
||||
"found: {current}".format(target=orig_cols, current=new_cols),
|
||||
)
|
||||
|
||||
def test_write_postgis_without_crs(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that GeoDataFrame can be written to PostGIS without CRS information.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Write to db
|
||||
df_nybb = df_nybb
|
||||
df_nybb.crs = None
|
||||
with pytest.warns(UserWarning, match="Could not parse CRS from the GeoDataF"):
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# Validate that srid is -1
|
||||
sql = text(
|
||||
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
|
||||
schema="public", table=table, geom_col="geometry"
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
target_srid = conn.execute(sql).fetchone()[0]
|
||||
assert target_srid == 0, "SRID should be 0, found %s" % target_srid
|
||||
|
||||
def test_write_postgis_with_esri_authority(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that GeoDataFrame can be written to PostGIS with ESRI Authority
|
||||
CRS information (GH #2414).
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Write to db
|
||||
df_nybb_esri = df_nybb.to_crs("ESRI:102003")
|
||||
write_postgis(df_nybb_esri, con=engine, name=table, if_exists="replace")
|
||||
# Validate that srid is 102003
|
||||
sql = text(
|
||||
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
|
||||
schema="public", table=table, geom_col="geometry"
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
target_srid = conn.execute(sql).fetchone()[0]
|
||||
assert target_srid == 102003, "SRID should be 102003, found %s" % target_srid
|
||||
|
||||
def test_write_postgis_geometry_collection(
|
||||
self, engine_postgis, df_geom_collection
|
||||
):
|
||||
"""
|
||||
Tests that writing a mix of different geometry types is possible.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(df_geom_collection, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
geom_type = conn.execute(sql).fetchone()[0]
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
|
||||
assert geom_type.upper() == "GEOMETRYCOLLECTION"
|
||||
assert df.geom_type.unique()[0] == "GeometryCollection"
|
||||
|
||||
def test_write_postgis_mixed_geometry_types(
|
||||
self, engine_postgis, df_mixed_single_and_multi
|
||||
):
|
||||
"""
|
||||
Tests that writing a mix of single and MultiGeometries is possible.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(
|
||||
df_mixed_single_and_multi, con=engine, name=table, if_exists="replace"
|
||||
)
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
res = conn.execute(sql).fetchall()
|
||||
assert res[0][0].upper() == "LINESTRING"
|
||||
assert res[1][0].upper() == "MULTILINESTRING"
|
||||
assert res[2][0].upper() == "POINT"
|
||||
|
||||
def test_write_postgis_linear_ring(self, engine_postgis, df_linear_ring):
|
||||
"""
|
||||
Tests that writing a LinearRing.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(df_linear_ring, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
geom_type = conn.execute(sql).fetchone()[0]
|
||||
|
||||
assert geom_type.upper() == "LINESTRING"
|
||||
|
||||
def test_write_postgis_in_chunks(self, engine_postgis, df_mixed_single_and_multi):
|
||||
"""
|
||||
Tests writing a LinearRing works.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(
|
||||
df_mixed_single_and_multi,
|
||||
con=engine,
|
||||
name=table,
|
||||
if_exists="replace",
|
||||
chunksize=1,
|
||||
)
|
||||
# Validate row count
|
||||
sql = text("SELECT COUNT(geometry) FROM {table};".format(table=table))
|
||||
with engine.connect() as conn:
|
||||
row_cnt = conn.execute(sql).fetchone()[0]
|
||||
assert row_cnt == 3
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
res = conn.execute(sql).fetchall()
|
||||
assert res[0][0].upper() == "LINESTRING"
|
||||
assert res[1][0].upper() == "MULTILINESTRING"
|
||||
assert res[2][0].upper() == "POINT"
|
||||
|
||||
def test_write_postgis_to_different_schema(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests writing data to alternative schema.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
schema_to_use = "test"
|
||||
sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
|
||||
with engine.begin() as conn:
|
||||
conn.execute(sql)
|
||||
|
||||
write_postgis(
|
||||
df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
|
||||
)
|
||||
# Validate
|
||||
sql = text(
|
||||
"SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
|
||||
)
|
||||
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_write_postgis_to_different_schema_when_table_exists(
|
||||
self, engine_postgis, df_nybb
|
||||
):
|
||||
"""
|
||||
Tests writing data to alternative schema.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
schema_to_use = "test"
|
||||
sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
|
||||
with engine.begin() as conn:
|
||||
conn.execute(sql)
|
||||
|
||||
try:
|
||||
write_postgis(
|
||||
df_nybb, con=engine, name=table, if_exists="fail", schema=schema_to_use
|
||||
)
|
||||
# Validate
|
||||
sql = text(
|
||||
"SELECT * FROM {schema}.{table};".format(
|
||||
schema=schema_to_use, table=table
|
||||
)
|
||||
)
|
||||
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
# Should raise a ValueError when table exists
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Try with replace flag on
|
||||
write_postgis(
|
||||
df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
|
||||
)
|
||||
# Validate
|
||||
sql = text(
|
||||
"SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
|
||||
)
|
||||
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_write_postgis_3D_geometries(self, engine_postgis, df_3D_geoms):
|
||||
"""
|
||||
Tests writing a geometries with 3 dimensions works.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(df_3D_geoms, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Check that all geometries have 3 dimensions
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
assert list(df.geometry.has_z) == [True, True, True]
|
||||
|
||||
def test_row_order(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that the row order in db table follows the order of the original frame.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "row_order_test"
|
||||
correct_order = df_nybb["BoroCode"].tolist()
|
||||
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Check that the row order matches
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
assert df["BoroCode"].tolist() == correct_order
|
||||
|
||||
def test_append_before_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that insert works with if_exists='append' when table does not exist yet.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(engine, table)
|
||||
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="append")
|
||||
|
||||
# Check that the row order matches
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_append_with_different_crs(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that the warning is raised if table CRS differs from frame.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Reproject
|
||||
df_nybb2 = df_nybb.to_crs(epsg=4326)
|
||||
|
||||
# Should raise error when appending
|
||||
with pytest.raises(ValueError, match="CRS of the target table"):
|
||||
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
|
||||
|
||||
@pytest.mark.xfail(
|
||||
compat.PANDAS_GE_20 and not compat.PANDAS_GE_21,
|
||||
reason="Duplicate columns are dropped in read_sql with pandas 2.0.x",
|
||||
)
|
||||
def test_duplicate_geometry_column_fails(self, engine_postgis):
|
||||
"""
|
||||
Tests that a ValueError is raised if an SQL query returns two geometry columns.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
sql = "select ST_MakePoint(0, 0) as geom, ST_MakePoint(0, 0) as geom;"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
read_postgis(sql, engine, geom_col="geom")
|
||||
Reference in New Issue
Block a user