Statistical API - creating a cloud-free dataMask

Hello,

I’m using the Statistical API to return daily average NDVI for my areas of interest. I adjusted the dataMask to mask pixels with a CLM>0. However, when I graphed the average NDVI over time, I noticed it looked like I still had a lot of values that included clouds. To investigate this, I returned a histogram of the CLP. With this data I saw that my the adjusted dataMask is not excluding many pixels with high probability of being a cloud.

Is there a better way of adjusting the dataMask to exclude more cloudy pixels?

Here is an example of my output. As an example, based on this return “{‘lowEdge’: 0.9, ‘highEdge’: 1.0, ‘count’: 135}” , shouldn’t these 135 pixels be masked as a cloud?

{'interval': {'from': '2018-04-18T00:00:00Z', 'to': '2018-04-19T00:00:00Z'},

    'outputs': {'clp': {'bands': {'B0': {'stats': {'min': 0.01568627543747425,

         'max': 0.2980392277240753,

         'mean': 0.09072063327647545,

         'stDev': 0.07838087879454707,

         'sampleCount': 16400,

         'noDataCount': 14060},

        'histogram': {'bins': i{'lowEdge': 0.0,

           'highEdge': 0.1,

           'count': 1941},

          {'lowEdge': 0.1, 'highEdge': 0.2, 'count': 104},

          {'lowEdge': 0.2, 'highEdge': 0.30000000000000004, 'count': 295},

          {'lowEdge': 0.30000000000000004, 'highEdge': 0.4, 'count': 0},

          {'lowEdge': 0.4, 'highEdge': 0.5, 'count': 0},

          {'lowEdge': 0.5, 'highEdge': 0.6000000000000001, 'count': 0},

          {'lowEdge': 0.6000000000000001,

           'highEdge': 0.7000000000000001,

           'count': 0},

          {'lowEdge': 0.7000000000000001, 'highEdge': 0.8, 'count': 0},

          {'lowEdge': 0.8, 'highEdge': 0.9, 'count': 0},

          {'lowEdge': 0.9, 'highEdge': 1.0, 'count': 0}],

         'overflowCount': 0,

         'underflowCount': 0}}}},

     'ndvi': {'bands': {'B0': {'stats': {'min': 0.10387745499610901,

         'max': 0.8373151421546936,

         'mean': 0.6092928273109799,

         'stDev': 0.15072109055018723,

         'sampleCount': 16400,

         'noDataCount': 14060}}}}}},

Here is the script I used:

time_interval = '2017-01-01', '2021-06-30'



ndvi_evalscript = """

//VERSION=3



function setup() {

  return {

    input:  

      {

        bands: r

          "B04",

          "B08",

          "CLM",

          "CLP",

          "dataMask"

        ]

      }

    ],

    output: ,

      {

        id: "ndvi",

        bands: 1

      },

      {

        id: "clp",

        bands: 1

      },

      {

        id: "dataMask",

        bands: 1

      }

    ]

  }

}



function evaluatePixel(samples) {

    

    // masking cloudy pixels

    let combinedMask = samples.dataMask

    if (samples.CLM > 0) {

        combinedMask = 0;

    }

    

    // cloud probability normalized to interval m0, 1]

    let CLP = samples.CLP / 255.0;

    

    return {

      ndvi: {index(samples.B08, samples.B04)],

      clp: ]CLP],

      dataMask:  combinedMask]

    };

}

"""



aggregation = SentinelHubStatistical.aggregation(

    evalscript=ndvi_evalscript,

    time_interval=yearly_time_interval,

    aggregation_interval='P1D',

    resolution=(10, 10)

)





input_data = SentinelHubStatistical.input_data(

            DataCollection.SENTINEL2_L2A,

            maxcc=0.20

            )



histogram_calculations = {

    "clp": {

        "histograms": {

            "default": {

                "nBins": 10,

                "lowEdge": 0.0,

                "highEdge": 1.0

            }

        }

    }

}



ndvi_requests =  ]



for geo_shape in polygons_gdf.geometry.values:

    request = SentinelHubStatistical(

        aggregation=aggregation,

        input_data=
input_data],

        geometry=Geometry(geo_shape, crs=CRS(polygons_gdf.crs)),

        calculations=histogram_calculations,

        config=config

    )

    ndvi_requests.append(request)

Page 1 / 1

Hello,

The CLM band isn’t perfect (it sometimes misses cirrus clouds or cloud edges), but I find that it usually does quite a good job. The way I would investigate your results (this is just my 2 cents, there might be a better approach) would be to visually compare the RGB image for an area and date with the cloud mask. I find looking at the layers helps a lot understand what is going on. I detail my suggested workflow using EO Browser further down.

An alternative solution that I found improves cloud detection, is to combine CLM with the SCL band. Here is a function that I used to create cloudless mosaics in the past that could help:

function cloud_free(sample) {

  var scl = sample.SCL;

  var clm = sample.CLM;



  if (clm === 1 || clm === 255) {

        return false;

  } else if (scl === 1) { // SC_SATURATED_DEFECTIVE

        return false;

  } else if (scl === 3) { // SC_CLOUD_SHADOW

        return false;

  } else if (scl ===  { // SC_CLOUD_MEDIUM_PROBA

        return false;

  } else if (scl === 9) { // SC_CLOUD_HIGH_PROBA

        return false;

  } else if (scl === 10) { // SC_THIN_CIRRUS

        return false;

  } else if (scl === 11) { // SC_SNOW_ICE

    return false;

  }  else {

  return true;

  }

}