from matplotlib import pyplot as plt
import zarr
import colorcet
import moraine.cli as mc
from moraine.utils_ import is_cuda_available
shp
Spatially Homogenious Pixels Identification
shp_test
shp_test (rslc:str, pvalue:str, az_half_win:int, r_half_win:int, method:str=None, chunks:tuple[int,int]=None, cuda:bool=False, processes=None, n_workers=None, threads_per_worker=None, rmm_pool_size=0.9, **dask_cluster_arg)
SHP identification through hypothetic test.
Type | Default | Details | |
---|---|---|---|
rslc | str | input: rslc stack | |
pvalue | str | output: the p value of the test | |
az_half_win | int | azimuth half window size | |
r_half_win | int | range half window size | |
method | str | None | SHP identification method,optional. Default: ks |
chunks | tuple | None | chunk size, optional. Default: the chunk size in rslc |
cuda | bool | False | if use cuda for processing, false by default |
processes | NoneType | None | use process for dask worker over thread, the default is False for cpu, only applied if cuda==False |
n_workers | NoneType | None | number of dask worker, the default is 1 for cpu, number of GPU for cuda |
threads_per_worker | NoneType | None | number of threads per dask worker, the default is 2 for cpu, only applied if cuda==False |
rmm_pool_size | float | 0.9 | set the rmm pool size, only applied when cuda==True |
dask_cluster_arg |
This function is a wrapper of functions in decorrelation.shp
that provides file interface. Please refer it for the usage. It utilizes dask for parallel and distributed computation. Compared with the functions in decorrelation.shp
, this function splits the dataset into several chunks and the computation in these chunks can run in parallel on multi-GPUs.
The r_chunk_size
and az_chunk_size
is used to determine how many pixels in range and azimuth in one chunk. The chunk size of the output pvalue is also setted according to them.
= './raw/rslc.zarr'
rslc = './shp/pvalue.zarr' pvalue
= mc.get_logger() logger
=5,r_half_win=5,method=None) shp_test(rslc,pvalue,az_half_win
2024-08-14 16:57:43 - log_args - INFO - running function: shp_test
2024-08-14 16:57:43 - log_args - INFO - fetching args:
2024-08-14 16:57:43 - log_args - INFO - rslc = './raw/rslc.zarr'
2024-08-14 16:57:43 - log_args - INFO - pvalue = './shp/pvalue.zarr'
2024-08-14 16:57:43 - log_args - INFO - az_half_win = 5
2024-08-14 16:57:43 - log_args - INFO - r_half_win = 5
2024-08-14 16:57:43 - log_args - INFO - method = None
2024-08-14 16:57:43 - log_args - INFO - chunks = None
2024-08-14 16:57:43 - log_args - INFO - cuda = False
2024-08-14 16:57:43 - log_args - INFO - processes = None
2024-08-14 16:57:43 - log_args - INFO - n_workers = None
2024-08-14 16:57:43 - log_args - INFO - threads_per_worker = None
2024-08-14 16:57:43 - log_args - INFO - rmm_pool_size = 0.9
2024-08-14 16:57:43 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 16:57:43 - log_args - INFO - fetching args done.
2024-08-14 16:57:43 - shp_test - INFO - hypothetic test method: ks
2024-08-14 16:57:43 - zarr_info - INFO - ./raw/rslc.zarr zarray shape, chunks, dtype: (2500, 1834, 17), (1000, 1000, 1), complex64
2024-08-14 16:57:43 - shp_test - INFO - starting dask local cluster.
2024-08-14 16:57:44 - shp_test - INFO - dask local cluster started.
2024-08-14 16:57:44 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 16:57:44 - shp_test - INFO - azimuth half window size: 5; azimuth window size: 11
2024-08-14 16:57:44 - shp_test - INFO - range half window size: 5; range window size: 11
2024-08-14 16:57:44 - darr_info - INFO - rslc with overlap dask array shape, chunksize, dtype: (2520, 1844, 17), (1010, 1005, 17), complex64
2024-08-14 16:57:44 - shp_test - INFO - applying test on rmli stack.
2024-08-14 16:57:44 - shp_test - INFO - trim shared boundaries between p value chunks
2024-08-14 16:57:44 - darr_info - INFO - p value dask array shape, chunksize, dtype: (2500, 1834, 11, 11), (1000, 1000, 11, 11), float32
2024-08-14 16:57:44 - shp_test - INFO - saving p value.
2024-08-14 16:57:45 - zarr_info - INFO - ./shp/pvalue.zarr zarray shape, chunks, dtype: (2500, 1834, 11, 11), (1000, 1000, 1, 1), float32
2024-08-14 16:57:45 - shp_test - INFO - computing graph setted. doing all the computing.
2024-08-14 16:57:53 - shp_test - INFO - computing finished. | 7.6s
2024-08-14 16:57:53 - shp_test - INFO - dask cluster closed.
CPU times: user 2min 25s, sys: 9.68 s, total: 2min 35s
Wall time: 9.67 s
if is_cuda_available():
=5,r_half_win=5,cuda=True) shp_test(rslc,pvalue,az_half_win
2024-08-14 16:57:53 - log_args - INFO - running function: shp_test
2024-08-14 16:57:53 - log_args - INFO - fetching args:
2024-08-14 16:57:53 - log_args - INFO - rslc = './raw/rslc.zarr'
2024-08-14 16:57:53 - log_args - INFO - pvalue = './shp/pvalue.zarr'
2024-08-14 16:57:53 - log_args - INFO - az_half_win = 5
2024-08-14 16:57:53 - log_args - INFO - r_half_win = 5
2024-08-14 16:57:53 - log_args - INFO - method = None
2024-08-14 16:57:53 - log_args - INFO - chunks = None
2024-08-14 16:57:53 - log_args - INFO - cuda = True
2024-08-14 16:57:53 - log_args - INFO - processes = None
2024-08-14 16:57:53 - log_args - INFO - n_workers = None
2024-08-14 16:57:53 - log_args - INFO - threads_per_worker = None
2024-08-14 16:57:53 - log_args - INFO - rmm_pool_size = 0.9
2024-08-14 16:57:53 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 16:57:53 - log_args - INFO - fetching args done.
2024-08-14 16:57:53 - shp_test - INFO - hypothetic test method: ks
2024-08-14 16:57:53 - zarr_info - INFO - ./raw/rslc.zarr zarray shape, chunks, dtype: (2500, 1834, 17), (1000, 1000, 1), complex64
2024-08-14 16:57:53 - shp_test - INFO - starting dask local cluster.
2024-08-14 16:57:59 - shp_test - INFO - dask local cluster started.
2024-08-14 16:57:59 - dask_cluster_info - INFO - dask cluster: LocalCUDACluster(dashboard_link='http://127.0.0.1:8787/status', workers=8, threads=8, memory=1.46 TiB)
2024-08-14 16:57:59 - shp_test - INFO - azimuth half window size: 5; azimuth window size: 11
2024-08-14 16:57:59 - shp_test - INFO - range half window size: 5; range window size: 11
2024-08-14 16:57:59 - darr_info - INFO - rslc with overlap dask array shape, chunksize, dtype: (2520, 1844, 17), (1010, 1005, 17), complex64
2024-08-14 16:57:59 - shp_test - INFO - applying test on rmli stack.
2024-08-14 16:57:59 - shp_test - INFO - trim shared boundaries between p value chunks
2024-08-14 16:57:59 - darr_info - INFO - p value dask array shape, chunksize, dtype: (2500, 1834, 11, 11), (1000, 1000, 11, 11), float32
2024-08-14 16:57:59 - shp_test - INFO - saving p value.
2024-08-14 16:58:00 - zarr_info - INFO - ./shp/pvalue.zarr zarray shape, chunks, dtype: (2500, 1834, 11, 11), (1000, 1000, 1, 1), float32
2024-08-14 16:58:00 - shp_test - INFO - computing graph setted. doing all the computing.
2024-08-14 16:58:03 - shp_test - INFO - computing finished. | 3.0s
2024-08-14 16:58:05 - shp_test - INFO - dask cluster closed.
CPU times: user 580 ms, sys: 1.67 s, total: 2.25 s
Wall time: 12.3 s
select_shp
select_shp (pvalue:str, is_shp:str, shp_num:str, p_max:float=0.05, chunks:tuple[int,int]=None, processes=False, n_workers=1, threads_per_worker=1, **dask_cluster_arg)
Select SHP based on pvalue of SHP test.
Type | Default | Details | |
---|---|---|---|
pvalue | str | input: pvalue of hypothetic test | |
is_shp | str | output: bool array indicating the SHPs | |
shp_num | str | output: integer array indicating number of SHPs | |
p_max | float | 0.05 | threshold of p value to select SHP,optional. Default: 0.05 |
chunks | tuple | None | chunk size, optional. Default: the chunk size in rslc |
processes | bool | False | use process for dask worker over thread, the default is False |
n_workers | int | 1 | number of dask worker, the default is 1 |
threads_per_worker | int | 1 | number of threads per dask worker |
dask_cluster_arg |
= './shp/pvalue.zarr'
pvalue = './shp/is_shp.zarr'
is_shp = './shp/num.zarr'
shp_num = 0.05 p_max
=p_max) select_shp(pvalue,is_shp,shp_num,p_max
2024-08-14 16:58:05 - log_args - INFO - running function: select_shp
2024-08-14 16:58:05 - log_args - INFO - fetching args:
2024-08-14 16:58:05 - log_args - INFO - pvalue = './shp/pvalue.zarr'
2024-08-14 16:58:05 - log_args - INFO - is_shp = './shp/is_shp.zarr'
2024-08-14 16:58:05 - log_args - INFO - shp_num = './shp/num.zarr'
2024-08-14 16:58:05 - log_args - INFO - p_max = 0.05
2024-08-14 16:58:05 - log_args - INFO - chunks = None
2024-08-14 16:58:05 - log_args - INFO - processes = False
2024-08-14 16:58:05 - log_args - INFO - n_workers = 1
2024-08-14 16:58:05 - log_args - INFO - threads_per_worker = 1
2024-08-14 16:58:05 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 16:58:05 - log_args - INFO - fetching args done.
2024-08-14 16:58:05 - zarr_info - INFO - ./shp/pvalue.zarr zarray shape, chunks, dtype: (2500, 1834, 11, 11), (1000, 1000, 1, 1), float32
2024-08-14 16:58:05 - select_shp - INFO - starting dask cluster.
2024-08-14 16:58:05 - select_shp - INFO - dask cluster started.
2024-08-14 16:58:05 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 16:58:05 - darr_info - INFO - pvalue dask array shape, chunksize, dtype: (2500, 1834, 11, 11), (1000, 1000, 11, 11), float32
2024-08-14 16:58:05 - select_shp - INFO - selecting SHPs based on pvalue threshold: 0.05
2024-08-14 16:58:05 - darr_info - INFO - is_shp dask array shape, chunksize, dtype: (2500, 1834, 11, 11), (1000, 1000, 11, 11), bool
2024-08-14 16:58:05 - select_shp - INFO - calculate shp_num.
2024-08-14 16:58:05 - darr_info - INFO - shp_num dask array shape, chunksize, dtype: (2500, 1834), (1000, 1000), int32
2024-08-14 16:58:05 - select_shp - INFO - saving is_shp.
2024-08-14 16:58:05 - zarr_info - INFO - ./shp/is_shp.zarr zarray shape, chunks, dtype: (2500, 1834, 11, 11), (1000, 1000, 1, 1), bool
2024-08-14 16:58:05 - select_shp - INFO - saving shp_num.
2024-08-14 16:58:05 - zarr_info - INFO - ./shp/num.zarr zarray shape, chunks, dtype: (2500, 1834), (1000, 1000), int32
2024-08-14 16:58:05 - select_shp - INFO - computing graph setted. doing all the computing.
2024-08-14 16:58:10 - select_shp - INFO - computing finished. 4.9s
2024-08-14 16:58:10 - select_shp - INFO - dask cluster closed.
CPU times: user 50.7 s, sys: 13.9 s, total: 1min 4s
Wall time: 5.46 s
= zarr.open(shp_num,'r')
shp_num_zarr = plt.subplots(1,1,figsize=(10,10))
fig, ax = ax.imshow(shp_num_zarr[:],cmap=colorcet.cm.fire)
pcm set(title='Number of SHPs',xlabel='Range Index',ylabel='Azimuth Index')
ax.
fig.colorbar(pcm) fig.show()
Then we can select DS candidates based on number of SHPs and slice is_shp only on DS candidates.
= './shp/ds_can_gix.zarr'
ds_can_gix = './shp/is_ds_can.zarr' is_ds_can
'ras>=50') mc.pc_logic_ras(shp_num,ds_can_gix,
2024-08-14 16:58:11 - log_args - INFO - running function: pc_logic_ras
2024-08-14 16:58:11 - log_args - INFO - fetching args:
2024-08-14 16:58:11 - log_args - INFO - ras = './shp/num.zarr'
2024-08-14 16:58:11 - log_args - INFO - gix = './shp/ds_can_gix.zarr'
2024-08-14 16:58:11 - log_args - INFO - operation = 'ras>=50'
2024-08-14 16:58:11 - log_args - INFO - chunks = 100000
2024-08-14 16:58:11 - log_args - INFO - fetching args done.
2024-08-14 16:58:11 - zarr_info - INFO - ./shp/num.zarr zarray shape, chunks, dtype: (2500, 1834), (1000, 1000), int32
2024-08-14 16:58:11 - pc_logic_ras - INFO - loading ras into memory.
2024-08-14 16:58:11 - pc_logic_ras - INFO - select pc based on operation: ras>=50
2024-08-14 16:58:11 - pc_logic_ras - INFO - number of selected pixels: 732727.
2024-08-14 16:58:11 - zarr_info - INFO - ./shp/ds_can_gix.zarr zarray shape, chunks, dtype: (732727, 2), (100000, 1), int32
2024-08-14 16:58:11 - pc_logic_ras - INFO - writing gix.
2024-08-14 16:58:11 - pc_logic_ras - INFO - write done.
= shp_num_zarr.shape) mc.gix2bool(ds_can_gix, is_ds_can, shape
2024-08-14 16:58:11 - log_args - INFO - running function: gix2bool
2024-08-14 16:58:11 - log_args - INFO - fetching args:
2024-08-14 16:58:11 - log_args - INFO - gix = './shp/ds_can_gix.zarr'
2024-08-14 16:58:11 - log_args - INFO - is_pc = './shp/is_ds_can.zarr'
2024-08-14 16:58:11 - log_args - INFO - shape = (2500, 1834)
2024-08-14 16:58:11 - log_args - INFO - chunks = (1000, 1000)
2024-08-14 16:58:11 - log_args - INFO - fetching args done.
2024-08-14 16:58:11 - zarr_info - INFO - gix zarray shape, chunks, dtype: (732727, 2), (100000, 1), int32
2024-08-14 16:58:11 - gix2bool - INFO - loading gix into memory.
2024-08-14 16:58:11 - gix2bool - INFO - calculate the bool array
2024-08-14 16:58:11 - zarr_info - INFO - is_pc zarray shape, chunks, dtype: (2500, 1834), (1000, 1000), bool
2024-08-14 16:58:11 - gix2bool - INFO - write the bool array.
2024-08-14 16:58:11 - gix2bool - INFO - write done.
= zarr.open(is_ds_can,'r')
is_ds_can_zarr = plt.subplots(1,1,figsize=(10,10))
fig, ax = ax.imshow(is_ds_can_zarr[:],cmap=colorcet.cm.fire)
pcm set(title='DS candidiate distribution',xlabel='Range Index',ylabel='Azimuth Index')
ax.
fig.colorbar(pcm) fig.show()