I just gave everyone access to the code. You can also see the code below
# Jupyter notebook related
%reload_ext autoreload
%autoreload 2
%matplotlib inline
# Built-in modules
import os
import datetime
import itertools
from aenum import MultiValueEnum
from enum import Enum
# Basics of Python data handling and visualization
import numpy as np
#np.random.seed(42)
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import ListedColormap, BoundaryNorm
from shapely.geometry import Polygon
from tqdm.auto import tqdm
# Machine learning
import lightgbm as lgb
import joblib
from sklearn import metrics
from sklearn import preprocessing
from eolearn.core import(
EOTask,
EOPatch,
EOWorkflow,
linearly_connect_tasks,
FeatureType,
OverwritePermission,
LoadTask,
SaveTask,
EOExecutor,
MergeFeatureTask,
)
from eolearn.io import SentinelHubInputTask, VectorImportTask, ExportToTiffTask
from eolearn.geometry import VectorToRasterTask, ErosionTask
from eolearn.features import LinearInterpolationTask, SimpleFilterTask, NormalizedDifferenceIndexTask
from eolearn.features import LinearInterpolationTask
from eolearn.ml_tools import FractionSamplingTask
from sentinelhub import UtmZoneSplitter, DataCollection, SHConfig, CRS
# Folder where data for running the notebook is stored
DATA_FOLDER = os.path.join(".", "example_data")
# Locations for collected data and intermediate results
EOPATCH_FOLDER = os.path.join(".", "eopatches")
EOPATCH_SAMPLES_FOLDER = os.path.join(".", "eopatches_sampled")
RESULTS_FOLDER = os.path.join(".", "results")
for folder in (DATA_FOLDER, EOPATCH_FOLDER, EOPATCH_SAMPLES_FOLDER, RESULTS_FOLDER):
os.makedirs(folder, exist_ok=True)
# Load geojson file
aoi = gpd.read_file(os.path.join(DATA_FOLDER, 'new_coordonates', 'EO_Browser_2480_36','Kossou_2480_36_km2.geojson'))
# Plot country
aoi.plot()
plt.axis('off');
#config crs
aoi_crs = CRS.UTM_30N
aoi = aoi.to_crs(epsg=aoi_crs.epsg)
aoi.plot()
plt.axis('off')
aoi = gpd.read_file(os.path.join(DATA_FOLDER, 'new_coordonates', 'EO_Browser_2480_36','Kossou_2480_36_km2.geojson'))
aoi = aoi.to_crs(epsg=aoi_crs.epsg)
# Create a splitter to obtain a list of bboxes with 5km sides
bbox_splitter = UtmZoneSplitter([aoi_shape], aoi.crs, 5000)
bbox_list = np.array(bbox_splitter.get_bbox_list())
info_list = np.array(bbox_splitter.get_info_list())
len(bbox_list)
# Prepare info of selected EOPatches
geometry = [Polygon(bbox.get_polygon()) for bbox in bbox_list]
idxs = [info["index"] for info in info_list]
idxs_x = [info["index_x"] for info in info_list]
idxs_y = [info["index_y"] for info in info_list]
bbox_gdf = gpd.GeoDataFrame({"index": idxs, "index_x": idxs_x, "index_y": idxs_y}, crs=aoi.crs, geometry=geometry)
# select a 5x5 area (id of center patch)
ID = 58
# Obtain surrounding 5x5 patches
patchIDs = []
for idx, (bbox, info) in enumerate(zip(bbox_list, info_list)):
if abs(info["index_x"] - info_list[id]["index_x"]) <= 2 and abs(info["index_y"] - info_list[id]["index_y"]) <= 2:
patchIDs.append(idx)
# Check if final size is 5x5
if len(patchIDs) != 5 * 5:
print("Warning! Use a different central patch ID, this one is on the border.")
# Change the order of the patches (useful for plotting)
patchIDs = np.transpose(np.fliplr(np.array(patchIDs).reshape(5, 5))).ravel()
# Save to shapefile
shapefile_name = "grid_kossou_983_29_500x500.gpkg"
bbox_gdf.to_file(os.path.join(RESULTS_FOLDER, shapefile_name), driver="GPKG")
# Display bboxes over area
fig, ax = plt.subplots(figsize=(30, 30))
ax.set_title("Selected 5x5 tiles from kossou", fontsize=25)
aoi.plot(ax=ax, facecolor="w", edgecolor="b", alpha=0.5)
bbox_gdf.plot(ax=ax, facecolor="w", edgecolor="r", alpha=0.5)
for bbox, info in zip(bbox_list, info_list):
geo = bbox.geometry
ax.text(geo.centroid.x, geo.centroid.y, info["index"], ha="center", va="center")
# Mark bboxes of selected area
bbox_gdf[bbox_gdf.index.isin(patchIDs)].plot(ax=ax, facecolor="g", edgecolor="r", alpha=0.5)
plt.axis("off");
#Download Data
class SentinelHubValidDataTask(EOTask):
"""
Combine Sen2Cor's classification map with `IS_DATA` to define a `VALID_DATA_SH` mask
The SentinelHub's cloud mask is asumed to be found in eopatch.mask['CLM']
"""
def __init__(self, output_feature):
self.output_feature = output_feature
def execute(self, eopatch):
eopatch[self.output_feature] = eopatch.mask["IS_DATA"].astype(bool) & (~eopatch.mask["CLM"].astype(bool))
return eopatch
class AddValidCountTask(EOTask):
"""
The task counts number of valid observations in time-series and stores the results in the timeless mask.
"""
def __init__(self, count_what, feature_name):
self.what = count_what
self.name = feature_name
def execute(self, eopatch):
eopatch[FeatureType.MASK_TIMELESS, self.name] = np.count_nonzero(eopatch.mask[self.what], axis=0)
return eopatch
# BAND DATA
# Add a request for S2 bands.
# Here we also do a simple filter of cloudy scenes (on tile level).
# The s2cloudless masks and probabilities are requested via additional data.
band_names = ["B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B09", "B11", "B12"]
add_data = SentinelHubInputTask(
bands_feature=(FeatureType.DATA, "BANDS"),
bands=band_names,
resolution=20,
maxcc=0.8,
time_difference=datetime.timedelta(minutes=120),
data_collection=DataCollection.SENTINEL2_L1C,
additional_data=[(FeatureType.MASK, "dataMask", "IS_DATA"), (FeatureType.MASK, "CLM"), (FeatureType.DATA, "CLP")],
config=config,
max_threads=5,
)
scl = SentinelHubInputTask(
data_collection=DataCollection.SENTINEL2_L2A,
bands=["B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B09", "B11", "B12"],
bands_feature=(FeatureType.DATA, "L2A_data"),
additional_data=[(FeatureType.MASK, "SCL")],
resolution=20,
maxcc=0.8,
time_difference=datetime.timedelta(minutes=120),
config=config,
max_threads=3,
)
# CALCULATING NEW FEATURES
# NDVI: (B08 - B04)/(B08 + B04)
# NDWI: (B03 - B08)/(B03 + B08)
# NDBI: (B11 - B08)/(B11 + B08)
ndvi = NormalizedDifferenceIndexTask(
(FeatureType.DATA, "BANDS"), (FeatureType.DATA, "NDVI"), [band_names.index("B08"), band_names.index("B04")]
)
ndwi = NormalizedDifferenceIndexTask(
(FeatureType.DATA, "BANDS"), (FeatureType.DATA, "NDWI"), [band_names.index("B03"), band_names.index("B08")]
)
ndbi = NormalizedDifferenceIndexTask(
(FeatureType.DATA, "BANDS"), (FeatureType.DATA, "NDBI"), [band_names.index("B11"), band_names.index("B08")]
)
# VALIDITY MASK
# Validate pixels using SentinelHub's cloud detection mask and region of acquisition
add_sh_validmask = SentinelHubValidDataTask((FeatureType.MASK, "IS_VALID"))
# COUNTING VALID PIXELS
# Count the number of valid observations per pixel using valid data mask
add_valid_count = AddValidCountTask("IS_VALID", "VALID_COUNT")
# SAVING TO OUTPUT (if needed)
save = SaveTask(EOPATCH_FOLDER, overwrite_permission=OverwritePermission.OVERWRITE_PATCH)
class LULC(MultiValueEnum):
"""Enum class containing basic LULC types"""
NO_DATA = "No Data", 0, "#ffffff"
WATER = "Water", 1, "#069af3"
FOREST = "Forest", 2, "#054907"
BUILDUP = "BuildUp", 3, "#dc143c"
BARRENLAND = "BarrenLand", 4, "#a6a6a6"
SAVANNA = "Savanna", 5, "#564234"
@property
def id(self):
return self.values[1]
@property
def color(self):
return self.values[2]
# Reference colormap things
lulc_cmap = ListedColormap([x.color for x in LULC], name="lulc_cmap")
lulc_norm = BoundaryNorm([x - 0.5 for x in range(len(LULC) + 1)], lulc_cmap.N)
# VALIDITY MASK
# Validate pixels using SentinelHub's cloud detection mask and region of acquisition
add_sh_validmask = SentinelHubValidDataTask((FeatureType.MASK, "IS_VALID"))
# COUNTING VALID PIXELS
# Count the number of valid observations per pixel using valid data mask
add_valid_count = AddValidCountTask("IS_VALID", "VALID_COUNT")
# SAVING TO OUTPUT (if needed)
save = SaveTask(EOPATCH_FOLDER, overwrite_permission=OverwritePermission.OVERWRITE_PATCH)
#Importing reference data
fg = os.path.join(DATA_FOLDER, 'new_coordonates', 'EO_Browser_2480_36', 'kossou_classification_2480_36_sec.gpkg')
data = gpd.read_file(fg)
outfg = os.path.join(DATA_FOLDER, "Kossou_final_classification_ref_map_5_788_02.gpkg")
data.to_file(outfg, driver="GPKG")
outfg = os.path.join(DATA_FOLDER, "Kossou_final_classification_ref_map_5_788_02.gpkg")
vector_feature = FeatureType.VECTOR_TIMELESS, "LULC_REFERENCE"
vector_import_task = VectorImportTask(vector_feature, outfg)
rasterization_task = VectorToRasterTask(
vector_feature,
(FeatureType.MASK_TIMELESS, "LULC"),
values_column="DN",
raster_shape=(FeatureType.MASK, "IS_DATA"),
raster_dtype=np.uint8,
)
# Define the workflow
workflow_nodes = linearly_connect_tasks(
add_data, ndvi, ndwi, ndbi, add_sh_validmask, add_valid_count, scl, vector_import_task, rasterization_task, save
)
workflow = EOWorkflow(workflow_nodes)
# Let's visualize it
workflow.dependency_graph()
%%time
# Time interval for the SH request
time_interval = ["2022-01-01", "2022-03-31"]
# Define additional parameters of the workflow
input_node = workflow_nodes[0]
save_node = workflow_nodes[-1]
execution_args = []
for idx, bbox in enumerate(bbox_list[patchIDs]):
execution_args.append(
{
input_node: {"bbox": bbox, "time_interval": time_interval},
save_node: {"eopatch_folder": f"eopatch_{idx}"},
}
)
# Execute the workflow
executor = EOExecutor(workflow, execution_args, save_logs=True)
executor.run(workers=4)
executor.make_report()
failed_ids = executor.get_failed_executions()
if failed_ids:
raise RuntimeError(
f"Execution failed EOPatches with IDs:\n{failed_ids}\n"
f"For more info check report at {executor.get_report_path()}"
)
I work on Linux. I tried the workers at 1. I get a long message that starts with:
DEBUG:fiona.ogrext:Setting field 0, type 'int64', to value 2
DEBUG:fiona.ogrext:Set field DN: 2
DEBUG:fiona.ogrext:Creating feature in layer: {'id': '285', 'type': 'Feature', 'properties': {'DN': 2}, 'geometry': {'type': 'Polygon', 'coordinates': (((214627.6881537891, 783158.8817835766), (214665.49466143537, 783158.6716159518), (214665.28583181952, 783121.1024123096), (214703.0923572732, 783120.8922824796), (214702.2572494616, 782970.6155117612), (214588.83731039264, 782971.2458665003), (214588.62847514445, 782933.6766452814), (214626.4351591771, 782933.4665089982), (214626.22636163695, 782895.8972980912), (214588.41964986757, 782896.1074244973), (214588.00202922802, 782820.9689842344), (214550.19525204902, 782821.1791187947), (214549.9864289536, 782783.6098894373), (214512.17961410008, 782783.8200420266), (214513.01507725683, 782934.0970015855), (214550.8217811713, 782933.8868094772), (214551.23951711645, 783009.0252721071), (214589.04615561216, 783008.8150881545), (214589.2550108031, 783046.3843102437), (214551.44840004796, 783046.5945040747), (214551.65729295224, 783084.1637364773), (214589.46387596527, 783083.9535327681), (214589.67275109893, 783121.5227557274), (214627.47929642873, 783121.3125700589), (214627.6881537891, 783158.8817835766)),)}}
DEBUG:fiona.ogrext:Looking up DN in {'DN': 'DN'}
DEBUG:fiona.ogrext:Normalizing schema type for key 'DN' in schema OrderedDict([('DN', 'int')]) to 'int64'
DEBUG:fiona.ogrext:Setting field 0, type 'int64', to value 2
DEBUG:fiona.ogrext:Set field DN: 2
and ends with :
DEBUG:sentinelhub.download.sentinelhub_client:Sending POST request to https://services.sentinel-hub.com/api/v1/process. Hash of sent request is af823da65f9b52cf7eaaf178307cbfc0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): services.sentinel-hub.com:443
DEBUG:urllib3.connectionpool:https://services.sentinel-hub.com:443 "POST /api/v1/process HTTP/1.1" 200 None
And when I visualize with the following code:
EOPatch.load('./eopatches/eopatch_0/')
I get this:
DEBUG:fiona.env:Entering env context: <fiona.env.Env object at 0x7f83fc244490>
DEBUG:fiona.env:Starting outermost env
DEBUG:fiona.env:No GDAL environment exists
DEBUG:fiona.env:New GDAL environment <fiona._env.GDALEnv object at 0x7f83fc70ddd0> created
DEBUG:fiona._env:GDAL data found in package: path='/usr/local/lib/python3.7/dist-packages/fiona/gdal_data'.
DEBUG:fiona._env:PROJ data found in package: path='/usr/local/lib/python3.7/dist-packages/fiona/proj_data'.
DEBUG:fiona._env:Started GDALEnv: self=<fiona._env.GDALEnv object at 0x7f83fc70ddd0>.
DEBUG:fiona.env:Updated existing <fiona._env.GDALEnv object at 0x7f83fc70ddd0> with options {}
DEBUG:fiona.env:Entered env context: <fiona.env.Env object at 0x7f83fc244490>
WARNING:fiona._env:File /vsimem/1e7531905b294a89bfd239c2fb7fff1a has GPKG application_id, but non conformant file extension
DEBUG:fiona.ogrext:Got coordinate system
DEBUG:fiona.ogrext:OLC_FASTSETNEXTBYINDEX: 0
DEBUG:fiona.ogrext:OLC_FASTFEATURECOUNT: 1
DEBUG:fiona.ogrext:Next index: 0
....
DEBUG:fiona.ogrext:Next index: 283
DEBUG:fiona.collection:Flushed buffer
DEBUG:fiona.collection:Stopped session
DEBUG:fiona.env:Exiting env context: <fiona.env.Env object at 0x7f83fc244490>
DEBUG:fiona.env:Cleared existing <fiona._env.GDALEnv object at 0x7f83fc70ddd0> options
DEBUG:fiona._env:Stopping GDALEnv <fiona._env.GDALEnv object at 0x7f83fc70ddd0>.
DEBUG:fiona._env:Error handler popped.
DEBUG:fiona._env:Stopped GDALEnv <fiona._env.GDALEnv object at 0x7f83fc70ddd0>.
DEBUG:fiona.env:Exiting outermost env
DEBUG:fiona.env:Exited env context: <fiona.env.Env object at 0x7f83fc244490>
Is this normal ? Thank you very much for your availability.