pc

Point Cloud data manipulation
import zarr
import numpy as np
from matplotlib import pyplot as plt
import moraine.cli as mc
import moraine as mr

source

gix2bool

 gix2bool (gix:str, is_pc:str, shape:tuple[int], chunks:tuple[int]=(1000,
           1000))

Convert pc grid index to bool 2d array

Type Default Details
gix str point cloud grid index
is_pc str output, output bool array
shape tuple shape of one image (nlines,width)
chunks tuple (1000, 1000) output chunk size

source

bool2gix

 bool2gix (is_pc:str, gix:str, chunks:int=100000)

Convert bool 2d array to grid index

Type Default Details
is_pc str input bool array
gix str output, point cloud grid index
chunks int 100000 output point chunk size

source

ras2pc

 ras2pc (idx:str, ras:str|list, pc:str|list, chunks:int=None,
         processes=False, n_workers=1, threads_per_worker=1,
         **dask_cluster_arg)

Convert raster data to point cloud data

Type Default Details
idx str point cloud grid index or hillbert index
ras str | list path (in string) or list of path for raster data
pc str | list output, path (in string) or list of path for point cloud data
chunks int None output point chunk size, same as gix by default
processes bool False use process for dask worker or thread
n_workers int 1 number of dask worker
threads_per_worker int 1 number of threads per dask worker
dask_cluster_arg

Usage:

logger = mc.get_logger()
ras_data1 = np.random.rand(100,100).astype(np.float32)
ras_data2 = np.random.rand(100,100,3).astype(np.float32)+1j*np.random.rand(100,100,3).astype(np.float32)

gix = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
gix.sort()
gix = np.stack(np.unravel_index(gix,shape=(100,100)),axis=-1).astype(np.int32)

pc_data1 = ras_data1[gix[:,0],gix[:,1]]
pc_data2 = ras_data2[gix[:,0],gix[:,1]]

gix_zarr = zarr.open('pc/gix.zarr','w',shape=gix.shape,dtype=gix.dtype,chunks=(200,1))
ras_zarr1 = zarr.open('pc/ras1.zarr','w',shape=ras_data1.shape,dtype=ras_data1.dtype,chunks=(20,100))
ras_zarr2 = zarr.open('pc/ras2.zarr','w',shape=ras_data2.shape,dtype=ras_data2.dtype,chunks=(20,100,1))
gix_zarr[:] = gix
ras_zarr1[:] = ras_data1
ras_zarr2[:] = ras_data2
ras2pc('pc/gix.zarr','pc/ras1.zarr','pc/pc1.zarr')
pc_zarr1 = zarr.open('pc/pc1.zarr','r')
np.testing.assert_array_equal(pc_data1,pc_zarr1[:])

ras2pc('pc/gix.zarr',ras=['pc/ras1.zarr','pc/ras2.zarr'],pc=['pc/pc1.zarr','pc/pc2.zarr'])
pc_zarr1 = zarr.open('pc/pc1.zarr','r')
pc_zarr2 = zarr.open('pc/pc2.zarr','r')
np.testing.assert_array_equal(pc_data1,pc_zarr1[:])
np.testing.assert_array_equal(pc_data2,pc_zarr2[:])
2024-08-14 17:15:41 - log_args - INFO - running function: ras2pc
2024-08-14 17:15:41 - log_args - INFO - fetching args:
2024-08-14 17:15:41 - log_args - INFO - idx = 'pc/gix.zarr'
2024-08-14 17:15:41 - log_args - INFO - ras = 'pc/ras1.zarr'
2024-08-14 17:15:41 - log_args - INFO - pc = 'pc/pc1.zarr'
2024-08-14 17:15:41 - log_args - INFO - chunks = None
2024-08-14 17:15:41 - log_args - INFO - processes = False
2024-08-14 17:15:41 - log_args - INFO - n_workers = 1
2024-08-14 17:15:41 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:15:41 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:15:41 - log_args - INFO - fetching args done.
2024-08-14 17:15:41 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (1000, 2), (200, 1), int32
2024-08-14 17:15:41 - ras2pc - INFO - loading gix into memory.
2024-08-14 17:15:41 - ras2pc - INFO - starting dask local cluster.
2024-08-14 17:15:41 - ras2pc - INFO - dask local cluster started.
2024-08-14 17:15:41 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:15:41 - ras2pc - INFO - start to slice on pc/ras1.zarr
2024-08-14 17:15:41 - zarr_info - INFO - pc/ras1.zarr zarray shape, chunks, dtype: (100, 100), (20, 100), float32
2024-08-14 17:15:41 - darr_info - INFO - ras dask array shape, chunksize, dtype: (100, 100), (100, 100), float32
2024-08-14 17:15:41 - darr_info - INFO - pc dask array shape, chunksize, dtype: (1000,), (1000,), float32
2024-08-14 17:15:41 - ras2pc - INFO - saving to pc/pc1.zarr.
2024-08-14 17:15:41 - zarr_info - INFO - pc/pc1.zarr zarray shape, chunks, dtype: (1000,), (200,), float32
2024-08-14 17:15:41 - ras2pc - INFO - computing graph setted. doing all the computing.
2024-08-14 17:15:42 - ras2pc - INFO - computing finished.ed |  0.1s
2024-08-14 17:15:42 - ras2pc - INFO - dask cluster closed.
2024-08-14 17:15:42 - log_args - INFO - running function: ras2pc
2024-08-14 17:15:42 - log_args - INFO - fetching args:
2024-08-14 17:15:42 - log_args - INFO - idx = 'pc/gix.zarr'
2024-08-14 17:15:42 - log_args - INFO - ras = ['pc/ras1.zarr', 'pc/ras2.zarr']
2024-08-14 17:15:42 - log_args - INFO - pc = ['pc/pc1.zarr', 'pc/pc2.zarr']
2024-08-14 17:15:42 - log_args - INFO - chunks = None
2024-08-14 17:15:42 - log_args - INFO - processes = False
2024-08-14 17:15:42 - log_args - INFO - n_workers = 1
2024-08-14 17:15:42 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:15:42 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:15:42 - log_args - INFO - fetching args done.
2024-08-14 17:15:42 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (1000, 2), (200, 1), int32
2024-08-14 17:15:42 - ras2pc - INFO - loading gix into memory.
2024-08-14 17:15:42 - ras2pc - INFO - starting dask local cluster.
2024-08-14 17:15:42 - ras2pc - INFO - dask local cluster started.
2024-08-14 17:15:42 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:15:42 - ras2pc - INFO - start to slice on pc/ras1.zarr
2024-08-14 17:15:42 - zarr_info - INFO - pc/ras1.zarr zarray shape, chunks, dtype: (100, 100), (20, 100), float32
2024-08-14 17:15:42 - darr_info - INFO - ras dask array shape, chunksize, dtype: (100, 100), (100, 100), float32
2024-08-14 17:15:42 - darr_info - INFO - pc dask array shape, chunksize, dtype: (1000,), (1000,), float32
2024-08-14 17:15:42 - ras2pc - INFO - saving to pc/pc1.zarr.
2024-08-14 17:15:42 - zarr_info - INFO - pc/pc1.zarr zarray shape, chunks, dtype: (1000,), (200,), float32
2024-08-14 17:15:42 - ras2pc - INFO - start to slice on pc/ras2.zarr
2024-08-14 17:15:42 - zarr_info - INFO - pc/ras2.zarr zarray shape, chunks, dtype: (100, 100, 3), (20, 100, 1), complex64
2024-08-14 17:15:42 - darr_info - INFO - ras dask array shape, chunksize, dtype: (100, 100, 3), (100, 100, 1), complex64
2024-08-14 17:15:42 - darr_info - INFO - pc dask array shape, chunksize, dtype: (1000, 3), (1000, 1), complex64
2024-08-14 17:15:42 - ras2pc - INFO - saving to pc/pc2.zarr.
2024-08-14 17:15:42 - zarr_info - INFO - pc/pc2.zarr zarray shape, chunks, dtype: (1000, 3), (200, 1), complex64
2024-08-14 17:15:42 - ras2pc - INFO - computing graph setted. doing all the computing.
2024-08-14 17:15:42 - ras2pc - INFO - computing finished.ed |  0.1s
2024-08-14 17:15:42 - ras2pc - INFO - dask cluster closed.

source

pc_concat

 pc_concat (pcs:list|str, pc:list|str, key:list|str=None, chunks:int=None,
            processes=False, n_workers=1, threads_per_worker=1,
            **dask_cluster_arg)

concatenate (and sort) point cloud dataset.

Type Default Details
pcs list | str list of path to pc or directory that hold one pc, or a list of that
pc list | str output, path of output or a list of that
key list | str None keys that sort the pc data, no sort by default
chunks int None pc chunk size in output data, optional, same as first pc in pcs by default
processes bool False use process for dask worker or thread
n_workers int 1 number of dask worker
threads_per_worker int 1 number of threads per dask worker
dask_cluster_arg
pc_data = np.random.rand(1000,3).astype(np.float32)+1j*np.random.rand(1000,3).astype(np.float32)
pc1_zarr = zarr.open('pc/pc1.zarr','w',shape=(300,3),dtype=pc_data.dtype,chunks=(300,1))
pc2_zarr = zarr.open('pc/pc2.zarr','w',shape=(700,3),dtype=pc_data.dtype,chunks=(700,1))
pc1_zarr[:] = pc_data[:300]
pc2_zarr[:] = pc_data[300:]
pc_concat(['pc/pc1.zarr','pc/pc2.zarr'],'pc/pc.zarr',chunks=500)
np.testing.assert_array_equal(zarr.open('pc/pc.zarr','r')[:],pc_data)
2024-08-14 17:15:42 - log_args - INFO - running function: pc_concat
2024-08-14 17:15:42 - log_args - INFO - fetching args:
2024-08-14 17:15:42 - log_args - INFO - pcs = ['pc/pc1.zarr', 'pc/pc2.zarr']
2024-08-14 17:15:42 - log_args - INFO - pc = 'pc/pc.zarr'
2024-08-14 17:15:42 - log_args - INFO - key = None
2024-08-14 17:15:42 - log_args - INFO - chunks = 500
2024-08-14 17:15:42 - log_args - INFO - processes = False
2024-08-14 17:15:42 - log_args - INFO - n_workers = 1
2024-08-14 17:15:42 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:15:42 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:15:42 - log_args - INFO - fetching args done.
2024-08-14 17:15:42 - pc_concat - INFO - input pcs: [['pc/pc1.zarr', 'pc/pc2.zarr']]
2024-08-14 17:15:42 - pc_concat - INFO - output pc: ['pc/pc.zarr']
2024-08-14 17:15:42 - pc_concat - INFO - starting dask local cluster.
2024-08-14 17:15:42 - pc_concat - INFO - dask local cluster started.
2024-08-14 17:15:42 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:15:42 - pc_concat - INFO - read pc from ['pc/pc1.zarr', 'pc/pc2.zarr']
2024-08-14 17:15:42 - darr_info - INFO - concatenated pc dask array shape, chunksize, dtype: (1000, 3), (1000, 1), complex64
2024-08-14 17:15:42 - pc_concat - INFO - save pc to pc/pc.zarr
2024-08-14 17:15:42 - zarr_info - INFO - pc/pc.zarr zarray shape, chunks, dtype: (1000, 3), (500, 1), complex64
2024-08-14 17:15:42 - pc_concat - INFO - computing graph setted. doing all the computing.
2024-08-14 17:15:42 - pc_concat - INFO - computing finished.|  0.1s
2024-08-14 17:15:42 - pc_concat - INFO - dask cluster closed.

source

ras2pc_ras_chunk

 ras2pc_ras_chunk (gix:str, ras:str|list, pc:str|list, key:str,
                   chunks:tuple=None, processes=False, n_workers=1,
                   threads_per_worker=1, **dask_cluster_arg)

Convert raster data to point cloud data that sorted by ras chunk

Type Default Details
gix str point cloud grid index
ras str | list path (in string) or list of path for raster data
pc str | list output, path (directory) or list of path for point cloud data
key str output, path for the key to sort generated pc in the directory back to gix order
chunks tuple None ras chunks, same as the first ras by default
processes bool False use process for dask worker or thread
n_workers int 1 number of dask worker
threads_per_worker int 1 number of threads per dask worker
dask_cluster_arg
ras2pc_ras_chunk('pc/gix.zarr','pc/ras1.zarr','pc/pc1',key='pc/key.zarr')
pc_concat('pc/pc1','pc/pc1.zarr',key='pc/key.zarr',chunks=200)
pc_zarr1 = zarr.open('pc/pc1.zarr','r')
np.testing.assert_array_equal(pc_data1,pc_zarr1[:])

ras2pc_ras_chunk('pc/gix.zarr',ras=['pc/ras1.zarr','pc/ras2.zarr'],pc=['pc/pc1','pc/pc2'],key='pc/key.zarr')
pc_concat('pc/pc1','pc/pc1.zarr',key='pc/key.zarr',chunks=200)
pc_concat('pc/pc2','pc/pc2.zarr',key='pc/key.zarr',chunks=200)
pc_zarr1 = zarr.open('pc/pc1.zarr','r')
pc_zarr2 = zarr.open('pc/pc2.zarr','r')
np.testing.assert_array_equal(pc_data1,pc_zarr1[:])
np.testing.assert_array_equal(pc_data2,pc_zarr2[:])
2024-08-14 17:15:43 - log_args - INFO - running function: ras2pc_ras_chunk
2024-08-14 17:15:43 - log_args - INFO - fetching args:
2024-08-14 17:15:43 - log_args - INFO - gix = 'pc/gix.zarr'
2024-08-14 17:15:43 - log_args - INFO - ras = 'pc/ras1.zarr'
2024-08-14 17:15:43 - log_args - INFO - pc = 'pc/pc1'
2024-08-14 17:15:43 - log_args - INFO - key = 'pc/key.zarr'
2024-08-14 17:15:43 - log_args - INFO - chunks = None
2024-08-14 17:15:43 - log_args - INFO - processes = False
2024-08-14 17:15:43 - log_args - INFO - n_workers = 1
2024-08-14 17:15:43 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:15:43 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:15:43 - log_args - INFO - fetching args done.
2024-08-14 17:15:43 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (1000, 2), (200, 1), int32
2024-08-14 17:15:43 - ras2pc_ras_chunk - INFO - loading gix into memory.
2024-08-14 17:15:43 - ras2pc_ras_chunk - INFO - convert gix to the order of ras chunk
2024-08-14 17:15:50 - ras2pc_ras_chunk - INFO - save key
2024-08-14 17:15:50 - ras2pc_ras_chunk - INFO - starting dask local cluster.
2024-08-14 17:15:50 - ras2pc_ras_chunk - INFO - dask local cluster started.
2024-08-14 17:15:50 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:15:50 - ras2pc_ras_chunk - INFO - start to slice on pc/ras1.zarr
2024-08-14 17:15:50 - zarr_info - INFO - pc/ras1.zarr zarray shape, chunks, dtype: (100, 100), (20, 20), float32
2024-08-14 17:15:50 - darr_info - INFO - ras dask array shape, chunksize, dtype: (100, 100), (20, 20), float32
2024-08-14 17:15:50 - darr_info - INFO - pc dask array shape, chunksize, dtype: (1000,), (50,), float32
2024-08-14 17:15:50 - ras2pc_ras_chunk - INFO - saving to pc/pc1.
2024-08-14 17:15:50 - ras2pc_ras_chunk - INFO - computing graph setted. doing all the computing.
2024-08-14 17:15:51 - ras2pc_ras_chunk - INFO - computing finished.
2024-08-14 17:15:51 - ras2pc_ras_chunk - INFO - dask cluster closed.
2024-08-14 17:15:51 - log_args - INFO - running function: pc_concat
2024-08-14 17:15:51 - log_args - INFO - fetching args:
2024-08-14 17:15:51 - log_args - INFO - pcs = 'pc/pc1'
2024-08-14 17:15:51 - log_args - INFO - pc = 'pc/pc1.zarr'
2024-08-14 17:15:51 - log_args - INFO - key = 'pc/key.zarr'
2024-08-14 17:15:51 - log_args - INFO - chunks = 200
2024-08-14 17:15:51 - log_args - INFO - processes = False
2024-08-14 17:15:51 - log_args - INFO - n_workers = 1
2024-08-14 17:15:51 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:15:51 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:15:51 - log_args - INFO - fetching args done.
2024-08-14 17:15:51 - pc_concat - INFO - input pcs: [[Path('pc/pc1/0.zarr'), Path('pc/pc1/1.zarr'), Path('pc/pc1/2.zarr'), Path('pc/pc1/3.zarr'), Path('pc/pc1/4.zarr'), Path('pc/pc1/5.zarr'), Path('pc/pc1/6.zarr'), Path('pc/pc1/7.zarr'), Path('pc/pc1/8.zarr'), Path('pc/pc1/9.zarr'), Path('pc/pc1/10.zarr'), Path('pc/pc1/11.zarr'), Path('pc/pc1/12.zarr'), Path('pc/pc1/13.zarr'), Path('pc/pc1/14.zarr'), Path('pc/pc1/15.zarr'), Path('pc/pc1/16.zarr'), Path('pc/pc1/17.zarr'), Path('pc/pc1/18.zarr'), Path('pc/pc1/19.zarr'), Path('pc/pc1/20.zarr'), Path('pc/pc1/21.zarr'), Path('pc/pc1/22.zarr'), Path('pc/pc1/23.zarr'), Path('pc/pc1/24.zarr')]]
2024-08-14 17:15:51 - pc_concat - INFO - output pc: ['pc/pc1.zarr']
2024-08-14 17:15:51 - pc_concat - INFO - load key
2024-08-14 17:15:51 - zarr_info - INFO - pc/key.zarr zarray shape, chunks, dtype: (1000,), (200,), int64
2024-08-14 17:15:51 - pc_concat - INFO - starting dask local cluster.
2024-08-14 17:15:51 - pc_concat - INFO - dask local cluster started.
2024-08-14 17:15:51 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:15:51 - pc_concat - INFO - read pc from [Path('pc/pc1/0.zarr'), Path('pc/pc1/1.zarr'), Path('pc/pc1/2.zarr'), Path('pc/pc1/3.zarr'), Path('pc/pc1/4.zarr'), Path('pc/pc1/5.zarr'), Path('pc/pc1/6.zarr'), Path('pc/pc1/7.zarr'), Path('pc/pc1/8.zarr'), Path('pc/pc1/9.zarr'), Path('pc/pc1/10.zarr'), Path('pc/pc1/11.zarr'), Path('pc/pc1/12.zarr'), Path('pc/pc1/13.zarr'), Path('pc/pc1/14.zarr'), Path('pc/pc1/15.zarr'), Path('pc/pc1/16.zarr'), Path('pc/pc1/17.zarr'), Path('pc/pc1/18.zarr'), Path('pc/pc1/19.zarr'), Path('pc/pc1/20.zarr'), Path('pc/pc1/21.zarr'), Path('pc/pc1/22.zarr'), Path('pc/pc1/23.zarr'), Path('pc/pc1/24.zarr')]
2024-08-14 17:15:51 - darr_info - INFO - concatenated pc dask array shape, chunksize, dtype: (1000,), (1000,), float32
2024-08-14 17:15:51 - pc_concat - INFO - sort pc according to key
2024-08-14 17:15:51 - darr_info - INFO - sorted pc dask array shape, chunksize, dtype: (1000,), (1000,), float32
2024-08-14 17:15:51 - pc_concat - INFO - save pc to pc/pc1.zarr
2024-08-14 17:15:51 - zarr_info - INFO - pc/pc1.zarr zarray shape, chunks, dtype: (1000,), (200,), float32
2024-08-14 17:15:51 - pc_concat - INFO - computing graph setted. doing all the computing.
2024-08-14 17:15:51 - pc_concat - INFO - computing finished.|  0.1s
2024-08-14 17:15:51 - pc_concat - INFO - dask cluster closed.
2024-08-14 17:15:51 - log_args - INFO - running function: ras2pc_ras_chunk
2024-08-14 17:15:51 - log_args - INFO - fetching args:
2024-08-14 17:15:51 - log_args - INFO - gix = 'pc/gix.zarr'
2024-08-14 17:15:51 - log_args - INFO - ras = ['pc/ras1.zarr', 'pc/ras2.zarr']
2024-08-14 17:15:51 - log_args - INFO - pc = ['pc/pc1', 'pc/pc2']
2024-08-14 17:15:51 - log_args - INFO - key = 'pc/key.zarr'
2024-08-14 17:15:51 - log_args - INFO - chunks = None
2024-08-14 17:15:51 - log_args - INFO - processes = False
2024-08-14 17:15:51 - log_args - INFO - n_workers = 1
2024-08-14 17:15:51 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:15:51 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:15:51 - log_args - INFO - fetching args done.
2024-08-14 17:15:51 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (1000, 2), (200, 1), int32
2024-08-14 17:15:51 - ras2pc_ras_chunk - INFO - loading gix into memory.
2024-08-14 17:15:51 - ras2pc_ras_chunk - INFO - convert gix to the order of ras chunk
2024-08-14 17:15:51 - ras2pc_ras_chunk - INFO - save key
2024-08-14 17:15:51 - ras2pc_ras_chunk - INFO - starting dask local cluster.
2024-08-14 17:15:51 - ras2pc_ras_chunk - INFO - dask local cluster started.
2024-08-14 17:15:51 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:15:51 - ras2pc_ras_chunk - INFO - start to slice on pc/ras1.zarr
2024-08-14 17:15:51 - zarr_info - INFO - pc/ras1.zarr zarray shape, chunks, dtype: (100, 100), (20, 20), float32
2024-08-14 17:15:51 - darr_info - INFO - ras dask array shape, chunksize, dtype: (100, 100), (20, 20), float32
2024-08-14 17:15:51 - darr_info - INFO - pc dask array shape, chunksize, dtype: (1000,), (50,), float32
2024-08-14 17:15:51 - ras2pc_ras_chunk - INFO - saving to pc/pc1.
2024-08-14 17:15:51 - ras2pc_ras_chunk - INFO - start to slice on pc/ras2.zarr
2024-08-14 17:15:51 - zarr_info - INFO - pc/ras2.zarr zarray shape, chunks, dtype: (100, 100, 3), (20, 20, 1), complex64
2024-08-14 17:15:51 - darr_info - INFO - ras dask array shape, chunksize, dtype: (100, 100, 3), (20, 20, 3), complex64
2024-08-14 17:15:51 - darr_info - INFO - pc dask array shape, chunksize, dtype: (1000, 3), (50, 3), complex64
2024-08-14 17:15:51 - ras2pc_ras_chunk - INFO - saving to pc/pc2.
2024-08-14 17:15:52 - ras2pc_ras_chunk - INFO - computing graph setted. doing all the computing.
2024-08-14 17:15:53 - ras2pc_ras_chunk - INFO - computing finished.
2024-08-14 17:15:53 - ras2pc_ras_chunk - INFO - dask cluster closed.
2024-08-14 17:15:53 - log_args - INFO - running function: pc_concat
2024-08-14 17:15:53 - log_args - INFO - fetching args:
2024-08-14 17:15:53 - log_args - INFO - pcs = 'pc/pc1'
2024-08-14 17:15:53 - log_args - INFO - pc = 'pc/pc1.zarr'
2024-08-14 17:15:53 - log_args - INFO - key = 'pc/key.zarr'
2024-08-14 17:15:53 - log_args - INFO - chunks = 200
2024-08-14 17:15:53 - log_args - INFO - processes = False
2024-08-14 17:15:53 - log_args - INFO - n_workers = 1
2024-08-14 17:15:53 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:15:53 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:15:53 - log_args - INFO - fetching args done.
2024-08-14 17:15:53 - pc_concat - INFO - input pcs: [[Path('pc/pc1/0.zarr'), Path('pc/pc1/1.zarr'), Path('pc/pc1/2.zarr'), Path('pc/pc1/3.zarr'), Path('pc/pc1/4.zarr'), Path('pc/pc1/5.zarr'), Path('pc/pc1/6.zarr'), Path('pc/pc1/7.zarr'), Path('pc/pc1/8.zarr'), Path('pc/pc1/9.zarr'), Path('pc/pc1/10.zarr'), Path('pc/pc1/11.zarr'), Path('pc/pc1/12.zarr'), Path('pc/pc1/13.zarr'), Path('pc/pc1/14.zarr'), Path('pc/pc1/15.zarr'), Path('pc/pc1/16.zarr'), Path('pc/pc1/17.zarr'), Path('pc/pc1/18.zarr'), Path('pc/pc1/19.zarr'), Path('pc/pc1/20.zarr'), Path('pc/pc1/21.zarr'), Path('pc/pc1/22.zarr'), Path('pc/pc1/23.zarr'), Path('pc/pc1/24.zarr')]]
2024-08-14 17:15:53 - pc_concat - INFO - output pc: ['pc/pc1.zarr']
2024-08-14 17:15:53 - pc_concat - INFO - load key
2024-08-14 17:15:53 - zarr_info - INFO - pc/key.zarr zarray shape, chunks, dtype: (1000,), (200,), int64
2024-08-14 17:15:53 - pc_concat - INFO - starting dask local cluster.
2024-08-14 17:15:53 - pc_concat - INFO - dask local cluster started.
2024-08-14 17:15:53 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:15:53 - pc_concat - INFO - read pc from [Path('pc/pc1/0.zarr'), Path('pc/pc1/1.zarr'), Path('pc/pc1/2.zarr'), Path('pc/pc1/3.zarr'), Path('pc/pc1/4.zarr'), Path('pc/pc1/5.zarr'), Path('pc/pc1/6.zarr'), Path('pc/pc1/7.zarr'), Path('pc/pc1/8.zarr'), Path('pc/pc1/9.zarr'), Path('pc/pc1/10.zarr'), Path('pc/pc1/11.zarr'), Path('pc/pc1/12.zarr'), Path('pc/pc1/13.zarr'), Path('pc/pc1/14.zarr'), Path('pc/pc1/15.zarr'), Path('pc/pc1/16.zarr'), Path('pc/pc1/17.zarr'), Path('pc/pc1/18.zarr'), Path('pc/pc1/19.zarr'), Path('pc/pc1/20.zarr'), Path('pc/pc1/21.zarr'), Path('pc/pc1/22.zarr'), Path('pc/pc1/23.zarr'), Path('pc/pc1/24.zarr')]
2024-08-14 17:15:53 - darr_info - INFO - concatenated pc dask array shape, chunksize, dtype: (1000,), (1000,), float32
2024-08-14 17:15:53 - pc_concat - INFO - sort pc according to key
2024-08-14 17:15:53 - darr_info - INFO - sorted pc dask array shape, chunksize, dtype: (1000,), (1000,), float32
2024-08-14 17:15:53 - pc_concat - INFO - save pc to pc/pc1.zarr
2024-08-14 17:15:53 - zarr_info - INFO - pc/pc1.zarr zarray shape, chunks, dtype: (1000,), (200,), float32
2024-08-14 17:15:53 - pc_concat - INFO - computing graph setted. doing all the computing.
2024-08-14 17:15:53 - pc_concat - INFO - computing finished.|  0.1s
2024-08-14 17:15:53 - pc_concat - INFO - dask cluster closed.
2024-08-14 17:15:53 - log_args - INFO - running function: pc_concat
2024-08-14 17:15:53 - log_args - INFO - fetching args:
2024-08-14 17:15:53 - log_args - INFO - pcs = 'pc/pc2'
2024-08-14 17:15:53 - log_args - INFO - pc = 'pc/pc2.zarr'
2024-08-14 17:15:53 - log_args - INFO - key = 'pc/key.zarr'
2024-08-14 17:15:53 - log_args - INFO - chunks = 200
2024-08-14 17:15:53 - log_args - INFO - processes = False
2024-08-14 17:15:53 - log_args - INFO - n_workers = 1
2024-08-14 17:15:53 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:15:53 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:15:53 - log_args - INFO - fetching args done.
2024-08-14 17:15:53 - pc_concat - INFO - input pcs: [[Path('pc/pc2/0.zarr'), Path('pc/pc2/1.zarr'), Path('pc/pc2/2.zarr'), Path('pc/pc2/3.zarr'), Path('pc/pc2/4.zarr'), Path('pc/pc2/5.zarr'), Path('pc/pc2/6.zarr'), Path('pc/pc2/7.zarr'), Path('pc/pc2/8.zarr'), Path('pc/pc2/9.zarr'), Path('pc/pc2/10.zarr'), Path('pc/pc2/11.zarr'), Path('pc/pc2/12.zarr'), Path('pc/pc2/13.zarr'), Path('pc/pc2/14.zarr'), Path('pc/pc2/15.zarr'), Path('pc/pc2/16.zarr'), Path('pc/pc2/17.zarr'), Path('pc/pc2/18.zarr'), Path('pc/pc2/19.zarr'), Path('pc/pc2/20.zarr'), Path('pc/pc2/21.zarr'), Path('pc/pc2/22.zarr'), Path('pc/pc2/23.zarr'), Path('pc/pc2/24.zarr')]]
2024-08-14 17:15:53 - pc_concat - INFO - output pc: ['pc/pc2.zarr']
2024-08-14 17:15:53 - pc_concat - INFO - load key
2024-08-14 17:15:53 - zarr_info - INFO - pc/key.zarr zarray shape, chunks, dtype: (1000,), (200,), int64
2024-08-14 17:15:53 - pc_concat - INFO - starting dask local cluster.
2024-08-14 17:15:53 - pc_concat - INFO - dask local cluster started.
2024-08-14 17:15:53 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:15:53 - pc_concat - INFO - read pc from [Path('pc/pc2/0.zarr'), Path('pc/pc2/1.zarr'), Path('pc/pc2/2.zarr'), Path('pc/pc2/3.zarr'), Path('pc/pc2/4.zarr'), Path('pc/pc2/5.zarr'), Path('pc/pc2/6.zarr'), Path('pc/pc2/7.zarr'), Path('pc/pc2/8.zarr'), Path('pc/pc2/9.zarr'), Path('pc/pc2/10.zarr'), Path('pc/pc2/11.zarr'), Path('pc/pc2/12.zarr'), Path('pc/pc2/13.zarr'), Path('pc/pc2/14.zarr'), Path('pc/pc2/15.zarr'), Path('pc/pc2/16.zarr'), Path('pc/pc2/17.zarr'), Path('pc/pc2/18.zarr'), Path('pc/pc2/19.zarr'), Path('pc/pc2/20.zarr'), Path('pc/pc2/21.zarr'), Path('pc/pc2/22.zarr'), Path('pc/pc2/23.zarr'), Path('pc/pc2/24.zarr')]
2024-08-14 17:15:53 - darr_info - INFO - concatenated pc dask array shape, chunksize, dtype: (1000, 3), (1000, 1), complex64
2024-08-14 17:15:53 - pc_concat - INFO - sort pc according to key
2024-08-14 17:15:53 - darr_info - INFO - sorted pc dask array shape, chunksize, dtype: (1000, 3), (1000, 1), complex64
2024-08-14 17:15:53 - pc_concat - INFO - save pc to pc/pc2.zarr
2024-08-14 17:15:53 - zarr_info - INFO - pc/pc2.zarr zarray shape, chunks, dtype: (1000, 3), (200, 1), complex64
2024-08-14 17:15:53 - pc_concat - INFO - computing graph setted. doing all the computing.
2024-08-14 17:15:53 - pc_concat - INFO - computing finished.|  0.2s
2024-08-14 17:15:53 - pc_concat - INFO - dask cluster closed.

source

pc2ras

 pc2ras (idx:str, pc:str|list, ras:str|list, shape:tuple[int],
         chunks:tuple[int]=(1000, 1000), processes=False, n_workers=1,
         threads_per_worker=1, **dask_cluster_arg)

Convert point cloud data to raster data, filled with nan

Type Default Details
idx str point cloud grid index or hillbert index
pc str | list path (in string) or list of path for point cloud data
ras str | list output, path (in string) or list of path for raster data
shape tuple shape of one image (nlines,width)
chunks tuple (1000, 1000) output chunk size
processes bool False use process for dask worker or thread
n_workers int 1 number of dask worker
threads_per_worker int 1 number of threads per dask worker
dask_cluster_arg

Usage:

pc_data1 = np.random.rand(1000).astype(np.float32)
pc_data2 = np.random.rand(1000,3).astype(np.float32)+1j*np.random.rand(1000,3).astype(np.float32)

gix = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
gix.sort()
gix = np.stack(np.unravel_index(gix,shape=(100,100)),axis=-1).astype(np.int32)

ras_data1 = np.zeros((100,100),dtype=np.float32)
ras_data2 = np.zeros((100,100,3),dtype=np.complex64)
ras_data1[:] = np.nan
ras_data2[:] = np.nan

ras_data1[gix[:,0],gix[:,1]] = pc_data1
ras_data2[gix[:,0],gix[:,1]] = pc_data2

gix_zarr = zarr.open('pc/gix.zarr','w',shape=gix.shape,dtype=gix.dtype,chunks=(200,1))
pc_zarr1 = zarr.open('pc/pc1.zarr','w',shape=pc_data1.shape,dtype=pc_data1.dtype,chunks=(200,))
pc_zarr2 = zarr.open('pc/pc2.zarr','w',shape=pc_data2.shape,dtype=pc_data2.dtype,chunks=(200,1))
gix_zarr[:] = gix
pc_zarr1[:] = pc_data1
pc_zarr2[:] = pc_data2
pc2ras('pc/gix.zarr','pc/pc1.zarr','pc/ras1.zarr',shape=(100,100),chunks=(20,100))
ras_zarr1 = zarr.open('pc/ras1.zarr','r')
np.testing.assert_array_equal(ras_data1,ras_zarr1[:])

pc2ras('pc/gix.zarr',['pc/pc1.zarr','pc/pc2.zarr'],['pc/ras1.zarr','pc/ras2.zarr'],shape=(100,100),chunks=(20,100))
ras_zarr1 = zarr.open('pc/ras1.zarr','r')
ras_zarr2 = zarr.open('pc/ras2.zarr','r')
np.testing.assert_array_equal(ras_data1,ras_zarr1[:])
np.testing.assert_array_equal(ras_data2,ras_zarr2[:])
2024-08-14 17:15:54 - log_args - INFO - running function: pc2ras
2024-08-14 17:15:54 - log_args - INFO - fetching args:
2024-08-14 17:15:54 - log_args - INFO - idx = 'pc/gix.zarr'
2024-08-14 17:15:54 - log_args - INFO - pc = 'pc/pc1.zarr'
2024-08-14 17:15:54 - log_args - INFO - ras = 'pc/ras1.zarr'
2024-08-14 17:15:54 - log_args - INFO - shape = (100, 100)
2024-08-14 17:15:54 - log_args - INFO - chunks = (20, 100)
2024-08-14 17:15:54 - log_args - INFO - processes = False
2024-08-14 17:15:54 - log_args - INFO - n_workers = 1
2024-08-14 17:15:54 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:15:54 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:15:54 - log_args - INFO - fetching args done.
2024-08-14 17:15:54 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (1000, 2), (200, 1), int32
2024-08-14 17:15:54 - pc2ras - INFO - loading gix into memory.
2024-08-14 17:15:54 - pc2ras - INFO - starting dask local cluster.
2024-08-14 17:15:55 - pc2ras - INFO - dask local cluster started.
2024-08-14 17:15:55 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:15:55 - pc2ras - INFO - start to work on pc/pc1.zarr
2024-08-14 17:15:55 - zarr_info - INFO - pc/pc1.zarr zarray shape, chunks, dtype: (1000,), (200,), float32
2024-08-14 17:15:55 - darr_info - INFO - pc dask array shape, chunksize, dtype: (1000,), (1000,), float32
2024-08-14 17:15:55 - pc2ras - INFO - create ras dask array
2024-08-14 17:15:55 - darr_info - INFO - ras dask array shape, chunksize, dtype: (100, 100), (100, 100), float32
2024-08-14 17:15:55 - pc2ras - INFO - save ras to pc/ras1.zarr
2024-08-14 17:15:55 - zarr_info - INFO - pc/ras1.zarr zarray shape, chunks, dtype: (100, 100), (20, 100), float32
2024-08-14 17:15:55 - pc2ras - INFO - computing graph setted. doing all the computing.
2024-08-14 17:15:55 - pc2ras - INFO - computing finished.ed |  0.2s
2024-08-14 17:15:55 - pc2ras - INFO - dask cluster closed.
2024-08-14 17:15:55 - log_args - INFO - running function: pc2ras
2024-08-14 17:15:55 - log_args - INFO - fetching args:
2024-08-14 17:15:55 - log_args - INFO - idx = 'pc/gix.zarr'
2024-08-14 17:15:55 - log_args - INFO - pc = ['pc/pc1.zarr', 'pc/pc2.zarr']
2024-08-14 17:15:55 - log_args - INFO - ras = ['pc/ras1.zarr', 'pc/ras2.zarr']
2024-08-14 17:15:55 - log_args - INFO - shape = (100, 100)
2024-08-14 17:15:55 - log_args - INFO - chunks = (20, 100)
2024-08-14 17:15:55 - log_args - INFO - processes = False
2024-08-14 17:15:55 - log_args - INFO - n_workers = 1
2024-08-14 17:15:55 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:15:55 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:15:55 - log_args - INFO - fetching args done.
2024-08-14 17:15:55 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (1000, 2), (200, 1), int32
2024-08-14 17:15:55 - pc2ras - INFO - loading gix into memory.
2024-08-14 17:15:55 - pc2ras - INFO - starting dask local cluster.
2024-08-14 17:15:55 - pc2ras - INFO - dask local cluster started.
2024-08-14 17:15:55 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:15:55 - pc2ras - INFO - start to work on pc/pc1.zarr
2024-08-14 17:15:55 - zarr_info - INFO - pc/pc1.zarr zarray shape, chunks, dtype: (1000,), (200,), float32
2024-08-14 17:15:55 - darr_info - INFO - pc dask array shape, chunksize, dtype: (1000,), (1000,), float32
2024-08-14 17:15:55 - pc2ras - INFO - create ras dask array
2024-08-14 17:15:55 - darr_info - INFO - ras dask array shape, chunksize, dtype: (100, 100), (100, 100), float32
2024-08-14 17:15:55 - pc2ras - INFO - save ras to pc/ras1.zarr
2024-08-14 17:15:55 - zarr_info - INFO - pc/ras1.zarr zarray shape, chunks, dtype: (100, 100), (20, 100), float32
2024-08-14 17:15:55 - pc2ras - INFO - start to work on pc/pc2.zarr
2024-08-14 17:15:55 - zarr_info - INFO - pc/pc2.zarr zarray shape, chunks, dtype: (1000, 3), (200, 1), complex64
2024-08-14 17:15:55 - darr_info - INFO - pc dask array shape, chunksize, dtype: (1000, 3), (1000, 1), complex64
2024-08-14 17:15:55 - pc2ras - INFO - create ras dask array
2024-08-14 17:15:55 - darr_info - INFO - ras dask array shape, chunksize, dtype: (100, 100, 3), (100, 100, 1), complex64
2024-08-14 17:15:55 - pc2ras - INFO - save ras to pc/ras2.zarr
2024-08-14 17:15:55 - zarr_info - INFO - pc/ras2.zarr zarray shape, chunks, dtype: (100, 100, 3), (20, 100, 1), complex64
2024-08-14 17:15:55 - pc2ras - INFO - computing graph setted. doing all the computing.
2024-08-14 17:15:55 - pc2ras - INFO - computing finished.ed |  0.1s
2024-08-14 17:15:55 - pc2ras - INFO - dask cluster closed.

source

pc_hix

 pc_hix (gix:str, hix:str, shape:tuple)

Compute the hillbert index from grid index for point cloud data.

Type Details
gix str grid index
hix str output, path
shape tuple (nlines, width)

Usage:

bbox = [0,0,100,100]
gix = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
gix.sort()
gix = np.stack(np.unravel_index(gix,shape=(100,100)),axis=-1).astype(np.int32)
gix_zarr = zarr.open('pc/gix.zarr','w',shape=gix.shape, chunks=(100,1),dtype=gix.dtype)
gix_zarr[:] = gix
pc_hix('pc/gix.zarr', 'pc/hix.zarr',shape=(100,100))
2024-08-14 17:15:56 - log_args - INFO - running function: pc_hix
2024-08-14 17:15:56 - log_args - INFO - fetching args:
2024-08-14 17:15:56 - log_args - INFO - gix = 'pc/gix.zarr'
2024-08-14 17:15:56 - log_args - INFO - hix = 'pc/hix.zarr'
2024-08-14 17:15:56 - log_args - INFO - shape = (100, 100)
2024-08-14 17:15:56 - log_args - INFO - fetching args done.
2024-08-14 17:15:56 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (1000, 2), (100, 1), int32
2024-08-14 17:15:56 - zarr_info - INFO - pc/hix.zarr zarray shape, chunks, dtype: (1000,), (100,), int64
2024-08-14 17:15:56 - pc_hix - INFO - calculating the hillbert index based on grid index
2024-08-14 17:15:57 - pc_hix - INFO - writing the hillbert index
2024-08-14 17:15:57 - pc_hix - INFO - done.
hix = zarr.open('pc/hix.zarr','r')[:]
plt.scatter(gix[:,1], gix[:,0], c=hix)
plt.colorbar()
plt.show()


source

pc_gix

 pc_gix (hix:str, gix:str, shape:tuple)

Compute the hillbert index from grid index for point cloud data.

Type Details
hix str grid index
gix str output, path
shape tuple (nlines, width)
bbox = [0,0,100,100]
gix = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
gix.sort()
gix = np.stack(np.unravel_index(gix,shape=(100,100)),axis=-1).astype(np.int32)
gix_zarr = zarr.open('pc/gix.zarr','w',shape=gix.shape, chunks=(100,1),dtype=gix.dtype)
gix_zarr[:] = gix
pc_hix('pc/gix.zarr', 'pc/hix.zarr',shape=(100,100))
pc_gix('pc/hix.zarr','pc/gix_.zarr', (100,100))
np.testing.assert_array_equal(zarr.open('pc/gix_.zarr','r')[:], gix)
2024-08-14 17:15:57 - log_args - INFO - running function: pc_hix
2024-08-14 17:15:57 - log_args - INFO - fetching args:
2024-08-14 17:15:57 - log_args - INFO - gix = 'pc/gix.zarr'
2024-08-14 17:15:57 - log_args - INFO - hix = 'pc/hix.zarr'
2024-08-14 17:15:57 - log_args - INFO - shape = (100, 100)
2024-08-14 17:15:57 - log_args - INFO - fetching args done.
2024-08-14 17:15:57 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (1000, 2), (100, 1), int32
2024-08-14 17:15:57 - zarr_info - INFO - pc/hix.zarr zarray shape, chunks, dtype: (1000,), (100,), int64
2024-08-14 17:15:58 - pc_hix - INFO - calculating the hillbert index based on grid index
2024-08-14 17:15:58 - pc_hix - INFO - writing the hillbert index
2024-08-14 17:15:58 - pc_hix - INFO - done.
2024-08-14 17:15:58 - log_args - INFO - running function: pc_gix
2024-08-14 17:15:58 - log_args - INFO - fetching args:
2024-08-14 17:15:58 - log_args - INFO - hix = 'pc/hix.zarr'
2024-08-14 17:15:58 - log_args - INFO - gix = 'pc/gix_.zarr'
2024-08-14 17:15:58 - log_args - INFO - shape = (100, 100)
2024-08-14 17:15:58 - log_args - INFO - fetching args done.
2024-08-14 17:15:58 - zarr_info - INFO - pc/hix.zarr zarray shape, chunks, dtype: (1000,), (100,), int64
2024-08-14 17:15:58 - zarr_info - INFO - pc/gix_.zarr zarray shape, chunks, dtype: (1000, 2), (100, 1), int32
2024-08-14 17:15:58 - pc_gix - INFO - calculating the grid index from hillbert index
2024-08-14 17:15:59 - pc_gix - INFO - writing the grid index
2024-08-14 17:15:59 - pc_gix - INFO - done.

source

pc_sort

 pc_sort (idx_in:str, idx:str, pc_in:str|list=None, pc:str|list=None,
          shape:tuple=None, chunks:int=None, key:str=None,
          processes=False, n_workers=1, threads_per_worker=1,
          **dask_cluster_arg)

Sort point cloud data according to the indices that sort idx_in.

Type Default Details
idx_in str the unsorted grid index or hillbert index of the input data
idx str output, the sorted grid index or hillbert index
pc_in str | list None path (in string) or list of path for the input point cloud data
pc str | list None output, path (in string) or list of path for the output point cloud data
shape tuple None (nline, width), faster if provided for grid index input
chunks int None chunk size in output data, same as idx_in by default
key str None output, path (in string) for the key of sorting
processes bool False use process for dask worker or thread
n_workers int 1 number of dask worker
threads_per_worker int 1 number of threads per dask worker
dask_cluster_arg
pc_in = np.random.rand(1000).astype(np.float32)
gix_in = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
gix_in = np.stack(np.unravel_index(gix_in,shape=(100,100)),axis=-1).astype(np.int32)
ind = np.lexsort((gix_in[:,1],gix_in[:,0]))
pc = pc_in[ind]; gix = gix_in[ind]

pc_in_zarr = zarr.open('pc/pc_in.zarr','w',shape=pc_in.shape,dtype=pc_in.dtype,chunks=(100,))
gix_in_zarr = zarr.open('pc/gix_in.zarr','w',shape=gix_in.shape,dtype=gix_in.dtype,chunks=(100,1))
pc_in_zarr[:] = pc_in; gix_in_zarr[:] = gix_in
pc_sort('pc/gix_in.zarr','pc/gix.zarr','pc/pc_in.zarr','pc/pc.zarr',shape=(100,100))
pc_zarr = zarr.open('pc/pc.zarr','r'); gix_zarr = zarr.open('pc/gix.zarr','r')
np.testing.assert_array_equal(pc_zarr[:],pc)
np.testing.assert_array_equal(gix_zarr[:],gix)
2024-08-14 17:15:59 - log_args - INFO - running function: pc_sort
2024-08-14 17:15:59 - log_args - INFO - fetching args:
2024-08-14 17:15:59 - log_args - INFO - idx_in = 'pc/gix_in.zarr'
2024-08-14 17:15:59 - log_args - INFO - idx = 'pc/gix.zarr'
2024-08-14 17:15:59 - log_args - INFO - pc_in = 'pc/pc_in.zarr'
2024-08-14 17:15:59 - log_args - INFO - pc = 'pc/pc.zarr'
2024-08-14 17:15:59 - log_args - INFO - shape = (100, 100)
2024-08-14 17:15:59 - log_args - INFO - chunks = None
2024-08-14 17:15:59 - log_args - INFO - key = None
2024-08-14 17:15:59 - log_args - INFO - processes = False
2024-08-14 17:15:59 - log_args - INFO - n_workers = 1
2024-08-14 17:15:59 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:15:59 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:15:59 - log_args - INFO - fetching args done.
2024-08-14 17:15:59 - zarr_info - INFO - pc/gix_in.zarr zarray shape, chunks, dtype: (1000, 2), (100, 1), int32
2024-08-14 17:15:59 - pc_sort - INFO - loading idx_in and calculate the sorting indices.
2024-08-14 17:15:59 - pc_sort - INFO - output pc chunk size is 100
2024-08-14 17:15:59 - pc_sort - INFO - write idx
2024-08-14 17:15:59 - zarr_info - INFO - idx zarray shape, chunks, dtype: (1000, 2), (100, 1), int32
2024-08-14 17:15:59 - pc_sort - INFO - starting dask local cluster.
2024-08-14 17:15:59 - pc_sort - INFO - dask local cluster started.
2024-08-14 17:15:59 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:15:59 - zarr_info - INFO - pc/pc_in.zarr zarray shape, chunks, dtype: (1000,), (100,), float32
2024-08-14 17:15:59 - darr_info - INFO - pc_in dask array shape, chunksize, dtype: (1000,), (1000,), float32
2024-08-14 17:15:59 - pc_sort - INFO - set up sorted pc data dask array.
2024-08-14 17:15:59 - darr_info - INFO - pc dask array shape, chunksize, dtype: (1000,), (1000,), float32
2024-08-14 17:15:59 - pc_sort - INFO - write pc to pc/pc.zarr
2024-08-14 17:15:59 - zarr_info - INFO - pc/pc.zarr zarray shape, chunks, dtype: (1000,), (100,), float32
2024-08-14 17:15:59 - pc_sort - INFO - computing graph setted. doing all the computing.
2024-08-14 17:15:59 - pc_sort - INFO - computing finished.d |  0.1s
2024-08-14 17:15:59 - pc_sort - INFO - dask cluster closed.
pc_in = np.random.rand(1000).astype(np.float32)
hix_in = np.random.choice(np.arange(100*100,dtype=np.int64),size=1000,replace=False)
ind = np.argsort(hix_in,kind='stable')
pc = pc_in[ind]; hix = hix_in[ind]

pc_in_zarr = zarr.open('pc/pc_in.zarr','w',shape=pc_in.shape,dtype=pc_in.dtype,chunks=(100,))
hix_in_zarr = zarr.open('pc/hix_in.zarr','w',shape=hix_in.shape,dtype=hix_in.dtype,chunks=(100,))
pc_in_zarr[:] = pc_in; hix_in_zarr[:] = hix_in

pc_sort('pc/hix_in.zarr','pc/hix.zarr','pc/pc_in.zarr','pc/pc.zarr')
pc_zarr = zarr.open('pc/pc.zarr','r'); hix_zarr = zarr.open('pc/hix.zarr','r')
np.testing.assert_array_equal(pc_zarr[:],pc)
np.testing.assert_array_equal(hix_zarr[:],hix)
2024-08-14 17:16:00 - log_args - INFO - running function: pc_sort
2024-08-14 17:16:00 - log_args - INFO - fetching args:
2024-08-14 17:16:00 - log_args - INFO - idx_in = 'pc/hix_in.zarr'
2024-08-14 17:16:00 - log_args - INFO - idx = 'pc/hix.zarr'
2024-08-14 17:16:00 - log_args - INFO - pc_in = 'pc/pc_in.zarr'
2024-08-14 17:16:00 - log_args - INFO - pc = 'pc/pc.zarr'
2024-08-14 17:16:00 - log_args - INFO - shape = None
2024-08-14 17:16:00 - log_args - INFO - chunks = None
2024-08-14 17:16:00 - log_args - INFO - key = None
2024-08-14 17:16:00 - log_args - INFO - processes = False
2024-08-14 17:16:00 - log_args - INFO - n_workers = 1
2024-08-14 17:16:00 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:00 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:00 - log_args - INFO - fetching args done.
2024-08-14 17:16:00 - zarr_info - INFO - pc/hix_in.zarr zarray shape, chunks, dtype: (1000,), (100,), int64
2024-08-14 17:16:00 - pc_sort - INFO - loading idx_in and calculate the sorting indices.
2024-08-14 17:16:00 - pc_sort - INFO - output pc chunk size is 100
2024-08-14 17:16:00 - pc_sort - INFO - write idx
2024-08-14 17:16:00 - zarr_info - INFO - idx zarray shape, chunks, dtype: (1000,), (100,), int64
2024-08-14 17:16:00 - pc_sort - INFO - starting dask local cluster.
2024-08-14 17:16:00 - pc_sort - INFO - dask local cluster started.
2024-08-14 17:16:00 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:16:00 - zarr_info - INFO - pc/pc_in.zarr zarray shape, chunks, dtype: (1000,), (100,), float32
2024-08-14 17:16:00 - darr_info - INFO - pc_in dask array shape, chunksize, dtype: (1000,), (1000,), float32
2024-08-14 17:16:00 - pc_sort - INFO - set up sorted pc data dask array.
2024-08-14 17:16:00 - darr_info - INFO - pc dask array shape, chunksize, dtype: (1000,), (1000,), float32
2024-08-14 17:16:00 - pc_sort - INFO - write pc to pc/pc.zarr
2024-08-14 17:16:00 - zarr_info - INFO - pc/pc.zarr zarray shape, chunks, dtype: (1000,), (100,), float32
2024-08-14 17:16:00 - pc_sort - INFO - computing graph setted. doing all the computing.
2024-08-14 17:16:00 - pc_sort - INFO - computing finished.d |  0.1s
2024-08-14 17:16:00 - pc_sort - INFO - dask cluster closed.

source

pc_union

 pc_union (idx1:str, idx2:str, idx:str, pc1:str|list=None,
           pc2:str|list=None, pc:str|list=None, shape:tuple=None,
           chunks:int=None, processes=False, n_workers=1,
           threads_per_worker=1, **dask_cluster_arg)

Get the union of two point cloud dataset. For points at their intersection, pc_data1 rather than pc_data2 is copied to the result pc_data. pc_chunk_size and n_pc_chunk are used to determine the final pc_chunk_size. If non of them are provided, the pc_chunk_size is setted as it in idx1.

Type Default Details
idx1 str grid index or hillbert index of the first point cloud
idx2 str grid index or hillbert index of the second point cloud
idx str output, grid index or hillbert index of the union point cloud
pc1 str | list None path (in string) or list of path for the first point cloud data
pc2 str | list None path (in string) or list of path for the second point cloud data
pc str | list None output, path (in string) or list of path for the union point cloud data
shape tuple None image shape, faster if provided for grid index input
chunks int None chunk size in output data, same as idx1 by default
processes bool False use process for dask worker or thread
n_workers int 1 number of dask worker
threads_per_worker int 1 number of threads per dask worker
dask_cluster_arg

Usage:

pc_data1 = np.random.rand(1000,3).astype(np.float32)+1j*np.random.rand(1000,3).astype(np.float32)
pc_data2 = np.random.rand(800,3).astype(np.float32)+1j*np.random.rand(800,3).astype(np.float32)

gix1 = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
gix1.sort()
gix1 = np.stack(np.unravel_index(gix1,shape=(100,100)),axis=-1).astype(np.int32)

gix2 = np.random.choice(np.arange(100*100,dtype=np.int32),size=800,replace=False)
gix2.sort()
gix2 = np.stack(np.unravel_index(gix2,shape=(100,100)),axis=-1).astype(np.int32)

gix, inv_iidx1, inv_iidx2, iidx2 = mr.pc_union(gix1,gix2)

pc_data = np.empty((gix.shape[0],*pc_data1.shape[1:]),dtype=pc_data1.dtype)
pc_data[inv_iidx1] = pc_data1
pc_data[inv_iidx2] = pc_data2[iidx2]

gix1_zarr = zarr.open('pc/gix1.zarr','w',shape=gix1.shape,dtype=gix1.dtype,chunks=(200,1))
gix2_zarr = zarr.open('pc/gix2.zarr','w',shape=gix2.shape,dtype=gix2.dtype,chunks=(200,1))
pc1_zarr = zarr.open('pc/pc1.zarr','w',shape=pc_data1.shape,dtype=pc_data1.dtype,chunks=(200,1))
pc2_zarr = zarr.open('pc/pc2.zarr','w',shape=pc_data2.shape,dtype=pc_data2.dtype,chunks=(200,1))
gix1_zarr[:] = gix1
gix2_zarr[:] = gix2
pc1_zarr[:] = pc_data1
pc2_zarr[:] = pc_data2
pc_union('pc/gix1.zarr','pc/gix2.zarr','pc/gix.zarr', shape=(100,100))
pc_union('pc/gix1.zarr','pc/gix2.zarr','pc/gix.zarr','pc/pc1.zarr','pc/pc2.zarr','pc/pc.zarr')
gix_zarr = zarr.open('pc/gix.zarr','r')
pc_zarr = zarr.open('pc/pc.zarr','r')
np.testing.assert_array_equal(gix_zarr[:],gix)
np.testing.assert_array_equal(pc_zarr[:],pc_data)
2024-08-14 17:16:01 - log_args - INFO - running function: pc_union
2024-08-14 17:16:01 - log_args - INFO - fetching args:
2024-08-14 17:16:01 - log_args - INFO - idx1 = 'pc/gix1.zarr'
2024-08-14 17:16:01 - log_args - INFO - idx2 = 'pc/gix2.zarr'
2024-08-14 17:16:01 - log_args - INFO - idx = 'pc/gix.zarr'
2024-08-14 17:16:01 - log_args - INFO - pc1 = None
2024-08-14 17:16:01 - log_args - INFO - pc2 = None
2024-08-14 17:16:01 - log_args - INFO - pc = None
2024-08-14 17:16:01 - log_args - INFO - shape = (100, 100)
2024-08-14 17:16:01 - log_args - INFO - chunks = None
2024-08-14 17:16:01 - log_args - INFO - processes = False
2024-08-14 17:16:01 - log_args - INFO - n_workers = 1
2024-08-14 17:16:01 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:01 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:01 - log_args - INFO - fetching args done.
2024-08-14 17:16:01 - zarr_info - INFO - pc/gix1.zarr zarray shape, chunks, dtype: (1000, 2), (200, 1), int32
2024-08-14 17:16:01 - zarr_info - INFO - pc/gix2.zarr zarray shape, chunks, dtype: (800, 2), (200, 1), int32
2024-08-14 17:16:01 - pc_union - INFO - loading idx1 and idx2 into memory.
2024-08-14 17:16:01 - pc_union - INFO - calculate the union
2024-08-14 17:16:01 - pc_union - INFO - number of points in the union: 1709
2024-08-14 17:16:01 - pc_union - INFO - write union idx
2024-08-14 17:16:01 - pc_union - INFO - write done
2024-08-14 17:16:01 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (1709, 2), (200, 1), int32
2024-08-14 17:16:01 - pc_union - INFO - no point cloud data provided, exit.
2024-08-14 17:16:01 - log_args - INFO - running function: pc_union
2024-08-14 17:16:01 - log_args - INFO - fetching args:
2024-08-14 17:16:01 - log_args - INFO - idx1 = 'pc/gix1.zarr'
2024-08-14 17:16:01 - log_args - INFO - idx2 = 'pc/gix2.zarr'
2024-08-14 17:16:01 - log_args - INFO - idx = 'pc/gix.zarr'
2024-08-14 17:16:01 - log_args - INFO - pc1 = 'pc/pc1.zarr'
2024-08-14 17:16:01 - log_args - INFO - pc2 = 'pc/pc2.zarr'
2024-08-14 17:16:01 - log_args - INFO - pc = 'pc/pc.zarr'
2024-08-14 17:16:01 - log_args - INFO - shape = None
2024-08-14 17:16:01 - log_args - INFO - chunks = None
2024-08-14 17:16:01 - log_args - INFO - processes = False
2024-08-14 17:16:01 - log_args - INFO - n_workers = 1
2024-08-14 17:16:01 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:01 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:01 - log_args - INFO - fetching args done.
2024-08-14 17:16:01 - zarr_info - INFO - pc/gix1.zarr zarray shape, chunks, dtype: (1000, 2), (200, 1), int32
2024-08-14 17:16:01 - zarr_info - INFO - pc/gix2.zarr zarray shape, chunks, dtype: (800, 2), (200, 1), int32
2024-08-14 17:16:01 - pc_union - INFO - loading idx1 and idx2 into memory.
2024-08-14 17:16:01 - pc_union - INFO - calculate the union
2024-08-14 17:16:01 - pc_union - INFO - number of points in the union: 1709
2024-08-14 17:16:01 - pc_union - INFO - write union idx
2024-08-14 17:16:01 - pc_union - INFO - write done
2024-08-14 17:16:01 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (1709, 2), (200, 1), int32
2024-08-14 17:16:01 - pc_union - INFO - starting dask local cluster.
2024-08-14 17:16:01 - pc_union - INFO - dask local cluster started.
2024-08-14 17:16:01 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:16:01 - zarr_info - INFO - pc/pc1.zarr zarray shape, chunks, dtype: (1000, 3), (200, 1), complex64
2024-08-14 17:16:01 - zarr_info - INFO - pc/pc2.zarr zarray shape, chunks, dtype: (800, 3), (200, 1), complex64
2024-08-14 17:16:01 - darr_info - INFO - pc1 dask array shape, chunksize, dtype: (1000, 3), (1000, 1), complex64
2024-08-14 17:16:01 - darr_info - INFO - pc2 dask array shape, chunksize, dtype: (800, 3), (800, 1), complex64
2024-08-14 17:16:01 - pc_union - INFO - set up union pc data dask array.
2024-08-14 17:16:01 - darr_info - INFO - pc dask array shape, chunksize, dtype: (1709, 3), (1709, 1), complex64
2024-08-14 17:16:01 - pc_union - INFO - write pc to pc/pc.zarr
2024-08-14 17:16:01 - zarr_info - INFO - pc/pc.zarr zarray shape, chunks, dtype: (1709, 3), (200, 1), complex64
2024-08-14 17:16:01 - pc_union - INFO - computing graph setted. doing all the computing.
2024-08-14 17:16:01 - pc_union - INFO - computing finished. |  0.1s
2024-08-14 17:16:01 - pc_union - INFO - dask cluster closed.
pc_data1 = np.random.rand(1000,3).astype(np.float32)+1j*np.random.rand(1000,3).astype(np.float32)
pc_data2 = np.random.rand(800,3).astype(np.float32)+1j*np.random.rand(800,3).astype(np.float32)

hix1 = np.random.choice(np.arange(100*100,dtype=np.int64),size=1000,replace=False)
hix1.sort()

hix2 = np.random.choice(np.arange(100*100,dtype=np.int64),size=800,replace=False)
hix2.sort()

hix, inv_iidx1, inv_iidx2, iidx2 = mr.pc_union(hix1,hix2)

pc_data = np.empty((hix.shape[0],*pc_data1.shape[1:]),dtype=pc_data1.dtype)
pc_data[inv_iidx1] = pc_data1
pc_data[inv_iidx2] = pc_data2[iidx2]

hix1_zarr = zarr.open('pc/hix1.zarr','w',shape=hix1.shape,dtype=hix1.dtype,chunks=(200,))
hix2_zarr = zarr.open('pc/hix2.zarr','w',shape=hix2.shape,dtype=hix2.dtype,chunks=(200,))
pc1_zarr = zarr.open('pc/pc1.zarr','w',shape=pc_data1.shape,dtype=pc_data1.dtype,chunks=(200,1))
pc2_zarr = zarr.open('pc/pc2.zarr','w',shape=pc_data2.shape,dtype=pc_data2.dtype,chunks=(200,1))
hix1_zarr[:] = hix1
hix2_zarr[:] = hix2
pc1_zarr[:] = pc_data1
pc2_zarr[:] = pc_data2
pc_union('pc/hix1.zarr','pc/hix2.zarr','pc/hix.zarr')
pc_union('pc/hix1.zarr','pc/hix2.zarr','pc/hix.zarr','pc/pc1.zarr','pc/pc2.zarr','pc/pc.zarr')
hix_zarr = zarr.open('pc/hix.zarr','r')
pc_zarr = zarr.open('pc/pc.zarr','r')
np.testing.assert_array_equal(hix_zarr[:],hix)
np.testing.assert_array_equal(pc_zarr[:],pc_data)
2024-08-14 17:16:02 - log_args - INFO - running function: pc_union
2024-08-14 17:16:02 - log_args - INFO - fetching args:
2024-08-14 17:16:02 - log_args - INFO - idx1 = 'pc/hix1.zarr'
2024-08-14 17:16:02 - log_args - INFO - idx2 = 'pc/hix2.zarr'
2024-08-14 17:16:02 - log_args - INFO - idx = 'pc/hix.zarr'
2024-08-14 17:16:02 - log_args - INFO - pc1 = None
2024-08-14 17:16:02 - log_args - INFO - pc2 = None
2024-08-14 17:16:02 - log_args - INFO - pc = None
2024-08-14 17:16:02 - log_args - INFO - shape = None
2024-08-14 17:16:02 - log_args - INFO - chunks = None
2024-08-14 17:16:02 - log_args - INFO - processes = False
2024-08-14 17:16:02 - log_args - INFO - n_workers = 1
2024-08-14 17:16:02 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:02 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:02 - log_args - INFO - fetching args done.
2024-08-14 17:16:02 - zarr_info - INFO - pc/hix1.zarr zarray shape, chunks, dtype: (1000,), (200,), int64
2024-08-14 17:16:02 - zarr_info - INFO - pc/hix2.zarr zarray shape, chunks, dtype: (800,), (200,), int64
2024-08-14 17:16:02 - pc_union - INFO - loading idx1 and idx2 into memory.
2024-08-14 17:16:02 - pc_union - INFO - calculate the union
2024-08-14 17:16:02 - pc_union - INFO - number of points in the union: 1719
2024-08-14 17:16:02 - pc_union - INFO - write union idx
2024-08-14 17:16:02 - pc_union - INFO - write done
2024-08-14 17:16:02 - zarr_info - INFO - pc/hix.zarr zarray shape, chunks, dtype: (1719,), (200,), int64
2024-08-14 17:16:02 - pc_union - INFO - no point cloud data provided, exit.
2024-08-14 17:16:02 - log_args - INFO - running function: pc_union
2024-08-14 17:16:02 - log_args - INFO - fetching args:
2024-08-14 17:16:02 - log_args - INFO - idx1 = 'pc/hix1.zarr'
2024-08-14 17:16:02 - log_args - INFO - idx2 = 'pc/hix2.zarr'
2024-08-14 17:16:02 - log_args - INFO - idx = 'pc/hix.zarr'
2024-08-14 17:16:02 - log_args - INFO - pc1 = 'pc/pc1.zarr'
2024-08-14 17:16:02 - log_args - INFO - pc2 = 'pc/pc2.zarr'
2024-08-14 17:16:02 - log_args - INFO - pc = 'pc/pc.zarr'
2024-08-14 17:16:02 - log_args - INFO - shape = None
2024-08-14 17:16:02 - log_args - INFO - chunks = None
2024-08-14 17:16:02 - log_args - INFO - processes = False
2024-08-14 17:16:02 - log_args - INFO - n_workers = 1
2024-08-14 17:16:02 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:02 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:02 - log_args - INFO - fetching args done.
2024-08-14 17:16:02 - zarr_info - INFO - pc/hix1.zarr zarray shape, chunks, dtype: (1000,), (200,), int64
2024-08-14 17:16:02 - zarr_info - INFO - pc/hix2.zarr zarray shape, chunks, dtype: (800,), (200,), int64
2024-08-14 17:16:02 - pc_union - INFO - loading idx1 and idx2 into memory.
2024-08-14 17:16:02 - pc_union - INFO - calculate the union
2024-08-14 17:16:02 - pc_union - INFO - number of points in the union: 1719
2024-08-14 17:16:02 - pc_union - INFO - write union idx
2024-08-14 17:16:02 - pc_union - INFO - write done
2024-08-14 17:16:02 - zarr_info - INFO - pc/hix.zarr zarray shape, chunks, dtype: (1719,), (200,), int64
2024-08-14 17:16:02 - pc_union - INFO - starting dask local cluster.
2024-08-14 17:16:02 - pc_union - INFO - dask local cluster started.
2024-08-14 17:16:02 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:16:02 - zarr_info - INFO - pc/pc1.zarr zarray shape, chunks, dtype: (1000, 3), (200, 1), complex64
2024-08-14 17:16:02 - zarr_info - INFO - pc/pc2.zarr zarray shape, chunks, dtype: (800, 3), (200, 1), complex64
2024-08-14 17:16:02 - darr_info - INFO - pc1 dask array shape, chunksize, dtype: (1000, 3), (1000, 1), complex64
2024-08-14 17:16:02 - darr_info - INFO - pc2 dask array shape, chunksize, dtype: (800, 3), (800, 1), complex64
2024-08-14 17:16:02 - pc_union - INFO - set up union pc data dask array.
2024-08-14 17:16:02 - darr_info - INFO - pc dask array shape, chunksize, dtype: (1719, 3), (1719, 1), complex64
2024-08-14 17:16:02 - pc_union - INFO - write pc to pc/pc.zarr
2024-08-14 17:16:02 - zarr_info - INFO - pc/pc.zarr zarray shape, chunks, dtype: (1719, 3), (200, 1), complex64
2024-08-14 17:16:02 - pc_union - INFO - computing graph setted. doing all the computing.
2024-08-14 17:16:02 - pc_union - INFO - computing finished. |  0.1s
2024-08-14 17:16:02 - pc_union - INFO - dask cluster closed.

source

pc_intersect

 pc_intersect (idx1:str, idx2:str, idx:str, pc1:str|list=None,
               pc2:str|list=None, pc:str|list=None, shape:tuple=None,
               chunks:int=None, prefer_1=True, processes=False,
               n_workers=1, threads_per_worker=1, **dask_cluster_arg)

Get the intersection of two point cloud dataset. pc_chunk_size and n_pc_chunk are used to determine the final pc_chunk_size. If non of them are provided, the n_pc_chunk is set to n_chunk in idx1.

Type Default Details
idx1 str grid index or hillbert index of the first point cloud
idx2 str grid index or hillbert index of the second point cloud
idx str output, grid index or hillbert index of the union point cloud
pc1 str | list None path (in string) or list of path for the first point cloud data
pc2 str | list None path (in string) or list of path for the second point cloud data
pc str | list None output, path (in string) or list of path for the union point cloud data
shape tuple None image shape, faster if provided for grid index input
chunks int None chunk size in output data, same as idx1 by default
prefer_1 bool True save pc1 on intersection to output pc dataset by default True. Otherwise, save data from pc2
processes bool False use process for dask worker or thread
n_workers int 1 number of dask worker
threads_per_worker int 1 number of threads per dask worker
dask_cluster_arg

Usage:

pc_data1 = np.random.rand(1000,3).astype(np.float32)+1j*np.random.rand(1000,3).astype(np.float32)
pc_data2 = np.random.rand(800,3).astype(np.float32)+1j*np.random.rand(800,3).astype(np.float32)

gix1 = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
gix1.sort()
gix1 = np.stack(np.unravel_index(gix1,shape=(100,100)),axis=-1).astype(np.int32)

gix2 = np.random.choice(np.arange(100*100,dtype=np.int32),size=800,replace=False)
gix2.sort()
gix2 = np.stack(np.unravel_index(gix2,shape=(100,100)),axis=-1).astype(np.int32)

gix, iidx1, iidx2 = mr.pc_intersect(gix1,gix2)

pc_data = np.empty((gix.shape[0],*pc_data1.shape[1:]),dtype=pc_data1.dtype)
pc_data[:] = pc_data2[iidx2]

gix1_zarr = zarr.open('pc/gix1.zarr','w',shape=gix1.shape,dtype=gix1.dtype,chunks=(200,1))
gix2_zarr = zarr.open('pc/gix2.zarr','w',shape=gix2.shape,dtype=gix2.dtype,chunks=(200,1))
pc1_zarr = zarr.open('pc/pc1.zarr','w',shape=pc_data1.shape,dtype=pc_data1.dtype,chunks=(200,1))
pc2_zarr = zarr.open('pc/pc2.zarr','w',shape=pc_data2.shape,dtype=pc_data2.dtype,chunks=(200,1))
gix1_zarr[:] = gix1
gix2_zarr[:] = gix2
pc1_zarr[:] = pc_data1
pc2_zarr[:] = pc_data2
pc_intersect('pc/gix1.zarr','pc/gix2.zarr','pc/gix.zarr', shape=(100,100))
pc_intersect('pc/gix1.zarr','pc/gix2.zarr','pc/gix.zarr',pc2='pc/pc2.zarr', pc='pc/pc.zarr',prefer_1=False)
gix_zarr = zarr.open('pc/gix.zarr','r')
pc_zarr = zarr.open('pc/pc.zarr','r')
np.testing.assert_array_equal(gix_zarr[:],gix)
np.testing.assert_array_equal(pc_zarr[:],pc_data)
2024-08-14 17:16:02 - log_args - INFO - running function: pc_intersect
2024-08-14 17:16:02 - log_args - INFO - fetching args:
2024-08-14 17:16:02 - log_args - INFO - idx1 = 'pc/gix1.zarr'
2024-08-14 17:16:02 - log_args - INFO - idx2 = 'pc/gix2.zarr'
2024-08-14 17:16:02 - log_args - INFO - idx = 'pc/gix.zarr'
2024-08-14 17:16:02 - log_args - INFO - pc1 = None
2024-08-14 17:16:02 - log_args - INFO - pc2 = None
2024-08-14 17:16:02 - log_args - INFO - pc = None
2024-08-14 17:16:02 - log_args - INFO - shape = (100, 100)
2024-08-14 17:16:02 - log_args - INFO - chunks = None
2024-08-14 17:16:02 - log_args - INFO - prefer_1 = True
2024-08-14 17:16:02 - log_args - INFO - processes = False
2024-08-14 17:16:02 - log_args - INFO - n_workers = 1
2024-08-14 17:16:02 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:02 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:02 - log_args - INFO - fetching args done.
2024-08-14 17:16:02 - zarr_info - INFO - pc/gix1.zarr zarray shape, chunks, dtype: (1000, 2), (200, 1), int32
2024-08-14 17:16:02 - zarr_info - INFO - pc/gix2.zarr zarray shape, chunks, dtype: (800, 2), (200, 1), int32
2024-08-14 17:16:02 - pc_intersect - INFO - loading idx1 and idx2 into memory.
2024-08-14 17:16:02 - pc_intersect - INFO - calculate the intersection
2024-08-14 17:16:02 - pc_intersect - INFO - number of points in the intersection: 84
2024-08-14 17:16:02 - pc_intersect - INFO - write intersect idx
2024-08-14 17:16:02 - pc_intersect - INFO - write done
2024-08-14 17:16:02 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (84, 2), (200, 1), int32
2024-08-14 17:16:02 - pc_intersect - INFO - no point cloud data provided, exit.
2024-08-14 17:16:02 - log_args - INFO - running function: pc_intersect
2024-08-14 17:16:02 - log_args - INFO - fetching args:
2024-08-14 17:16:02 - log_args - INFO - idx1 = 'pc/gix1.zarr'
2024-08-14 17:16:02 - log_args - INFO - idx2 = 'pc/gix2.zarr'
2024-08-14 17:16:02 - log_args - INFO - idx = 'pc/gix.zarr'
2024-08-14 17:16:02 - log_args - INFO - pc1 = None
2024-08-14 17:16:02 - log_args - INFO - pc2 = 'pc/pc2.zarr'
2024-08-14 17:16:02 - log_args - INFO - pc = 'pc/pc.zarr'
2024-08-14 17:16:02 - log_args - INFO - shape = None
2024-08-14 17:16:02 - log_args - INFO - chunks = None
2024-08-14 17:16:02 - log_args - INFO - prefer_1 = False
2024-08-14 17:16:02 - log_args - INFO - processes = False
2024-08-14 17:16:02 - log_args - INFO - n_workers = 1
2024-08-14 17:16:02 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:02 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:02 - log_args - INFO - fetching args done.
2024-08-14 17:16:02 - zarr_info - INFO - pc/gix1.zarr zarray shape, chunks, dtype: (1000, 2), (200, 1), int32
2024-08-14 17:16:02 - zarr_info - INFO - pc/gix2.zarr zarray shape, chunks, dtype: (800, 2), (200, 1), int32
2024-08-14 17:16:02 - pc_intersect - INFO - loading idx1 and idx2 into memory.
2024-08-14 17:16:02 - pc_intersect - INFO - calculate the intersection
2024-08-14 17:16:02 - pc_intersect - INFO - number of points in the intersection: 84
2024-08-14 17:16:02 - pc_intersect - INFO - write intersect idx
2024-08-14 17:16:02 - pc_intersect - INFO - write done
2024-08-14 17:16:02 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (84, 2), (200, 1), int32
2024-08-14 17:16:02 - pc_intersect - INFO - select pc2 as pc_input.
2024-08-14 17:16:02 - pc_intersect - INFO - starting dask local cluster.
2024-08-14 17:16:02 - pc_intersect - INFO - dask local cluster started.
2024-08-14 17:16:02 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:16:02 - zarr_info - INFO - pc/pc2.zarr zarray shape, chunks, dtype: (800, 3), (200, 1), complex64
2024-08-14 17:16:02 - darr_info - INFO - pc_input dask array shape, chunksize, dtype: (800, 3), (800, 1), complex64
2024-08-14 17:16:02 - pc_intersect - INFO - set up intersect pc data dask array.
2024-08-14 17:16:02 - darr_info - INFO - pc dask array shape, chunksize, dtype: (84, 3), (84, 1), complex64
2024-08-14 17:16:02 - pc_intersect - INFO - write pc to pc/pc.zarr
2024-08-14 17:16:02 - zarr_info - INFO - pc/pc.zarr zarray shape, chunks, dtype: (84, 3), (200, 1), complex64
2024-08-14 17:16:02 - pc_intersect - INFO - computing graph setted. doing all the computing.
2024-08-14 17:16:03 - pc_intersect - INFO - computing finished.0.1s
2024-08-14 17:16:03 - pc_intersect - INFO - dask cluster closed.
pc_data1 = np.random.rand(1000,3).astype(np.float32)+1j*np.random.rand(1000,3).astype(np.float32)
pc_data2 = np.random.rand(800,3).astype(np.float32)+1j*np.random.rand(800,3).astype(np.float32)

hix1 = np.random.choice(np.arange(100*100,dtype=np.int64),size=1000,replace=False)
hix1.sort()

hix2 = np.random.choice(np.arange(100*100,dtype=np.int64),size=800,replace=False)
hix2.sort()

hix, iidx1, iidx2 = mr.pc_intersect(hix1,hix2)

pc_data = np.empty((hix.shape[-1],*pc_data1.shape[1:]),dtype=pc_data1.dtype)
pc_data[:] = pc_data2[iidx2]

hix1_zarr = zarr.open('pc/hix1.zarr','w',shape=hix1.shape,dtype=hix1.dtype,chunks=(200,))
hix2_zarr = zarr.open('pc/hix2.zarr','w',shape=hix2.shape,dtype=hix2.dtype,chunks=(200,))
pc1_zarr = zarr.open('pc/pc1.zarr','w',shape=pc_data1.shape,dtype=pc_data1.dtype,chunks=(200,1))
pc2_zarr = zarr.open('pc/pc2.zarr','w',shape=pc_data2.shape,dtype=pc_data2.dtype,chunks=(200,1))
hix1_zarr[:] = hix1
hix2_zarr[:] = hix2
pc1_zarr[:] = pc_data1
pc2_zarr[:] = pc_data2
pc_intersect('pc/hix1.zarr','pc/hix2.zarr','pc/hix.zarr')
pc_intersect('pc/hix1.zarr','pc/hix2.zarr','pc/hix.zarr',pc2='pc/pc2.zarr', pc='pc/pc.zarr',prefer_1=False)
hix_zarr = zarr.open('pc/hix.zarr','r')
pc_zarr = zarr.open('pc/pc.zarr','r')
np.testing.assert_array_equal(hix_zarr[:],hix)
np.testing.assert_array_equal(pc_zarr[:],pc_data)
2024-08-14 17:16:03 - log_args - INFO - running function: pc_intersect
2024-08-14 17:16:03 - log_args - INFO - fetching args:
2024-08-14 17:16:03 - log_args - INFO - idx1 = 'pc/hix1.zarr'
2024-08-14 17:16:03 - log_args - INFO - idx2 = 'pc/hix2.zarr'
2024-08-14 17:16:03 - log_args - INFO - idx = 'pc/hix.zarr'
2024-08-14 17:16:03 - log_args - INFO - pc1 = None
2024-08-14 17:16:03 - log_args - INFO - pc2 = None
2024-08-14 17:16:03 - log_args - INFO - pc = None
2024-08-14 17:16:03 - log_args - INFO - shape = None
2024-08-14 17:16:03 - log_args - INFO - chunks = None
2024-08-14 17:16:03 - log_args - INFO - prefer_1 = True
2024-08-14 17:16:03 - log_args - INFO - processes = False
2024-08-14 17:16:03 - log_args - INFO - n_workers = 1
2024-08-14 17:16:03 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:03 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:03 - log_args - INFO - fetching args done.
2024-08-14 17:16:03 - zarr_info - INFO - pc/hix1.zarr zarray shape, chunks, dtype: (1000,), (200,), int64
2024-08-14 17:16:03 - zarr_info - INFO - pc/hix2.zarr zarray shape, chunks, dtype: (800,), (200,), int64
2024-08-14 17:16:03 - pc_intersect - INFO - loading idx1 and idx2 into memory.
2024-08-14 17:16:03 - pc_intersect - INFO - calculate the intersection
2024-08-14 17:16:03 - pc_intersect - INFO - number of points in the intersection: 80
2024-08-14 17:16:03 - pc_intersect - INFO - write intersect idx
2024-08-14 17:16:03 - pc_intersect - INFO - write done
2024-08-14 17:16:03 - zarr_info - INFO - pc/hix.zarr zarray shape, chunks, dtype: (80,), (200,), int64
2024-08-14 17:16:03 - pc_intersect - INFO - no point cloud data provided, exit.
2024-08-14 17:16:03 - log_args - INFO - running function: pc_intersect
2024-08-14 17:16:03 - log_args - INFO - fetching args:
2024-08-14 17:16:03 - log_args - INFO - idx1 = 'pc/hix1.zarr'
2024-08-14 17:16:03 - log_args - INFO - idx2 = 'pc/hix2.zarr'
2024-08-14 17:16:03 - log_args - INFO - idx = 'pc/hix.zarr'
2024-08-14 17:16:03 - log_args - INFO - pc1 = None
2024-08-14 17:16:03 - log_args - INFO - pc2 = 'pc/pc2.zarr'
2024-08-14 17:16:03 - log_args - INFO - pc = 'pc/pc.zarr'
2024-08-14 17:16:03 - log_args - INFO - shape = None
2024-08-14 17:16:03 - log_args - INFO - chunks = None
2024-08-14 17:16:03 - log_args - INFO - prefer_1 = False
2024-08-14 17:16:03 - log_args - INFO - processes = False
2024-08-14 17:16:03 - log_args - INFO - n_workers = 1
2024-08-14 17:16:03 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:03 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:03 - log_args - INFO - fetching args done.
2024-08-14 17:16:03 - zarr_info - INFO - pc/hix1.zarr zarray shape, chunks, dtype: (1000,), (200,), int64
2024-08-14 17:16:03 - zarr_info - INFO - pc/hix2.zarr zarray shape, chunks, dtype: (800,), (200,), int64
2024-08-14 17:16:03 - pc_intersect - INFO - loading idx1 and idx2 into memory.
2024-08-14 17:16:03 - pc_intersect - INFO - calculate the intersection
2024-08-14 17:16:03 - pc_intersect - INFO - number of points in the intersection: 80
2024-08-14 17:16:03 - pc_intersect - INFO - write intersect idx
2024-08-14 17:16:03 - pc_intersect - INFO - write done
2024-08-14 17:16:03 - zarr_info - INFO - pc/hix.zarr zarray shape, chunks, dtype: (80,), (200,), int64
2024-08-14 17:16:03 - pc_intersect - INFO - select pc2 as pc_input.
2024-08-14 17:16:03 - pc_intersect - INFO - starting dask local cluster.
2024-08-14 17:16:03 - pc_intersect - INFO - dask local cluster started.
2024-08-14 17:16:03 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:16:03 - zarr_info - INFO - pc/pc2.zarr zarray shape, chunks, dtype: (800, 3), (200, 1), complex64
2024-08-14 17:16:03 - darr_info - INFO - pc_input dask array shape, chunksize, dtype: (800, 3), (800, 1), complex64
2024-08-14 17:16:03 - pc_intersect - INFO - set up intersect pc data dask array.
2024-08-14 17:16:03 - darr_info - INFO - pc dask array shape, chunksize, dtype: (80, 3), (80, 1), complex64
2024-08-14 17:16:03 - pc_intersect - INFO - write pc to pc/pc.zarr
2024-08-14 17:16:03 - zarr_info - INFO - pc/pc.zarr zarray shape, chunks, dtype: (80, 3), (200, 1), complex64
2024-08-14 17:16:03 - pc_intersect - INFO - computing graph setted. doing all the computing.
2024-08-14 17:16:03 - pc_intersect - INFO - computing finished.0.1s
2024-08-14 17:16:03 - pc_intersect - INFO - dask cluster closed.

source

pc_diff

 pc_diff (idx1:str, idx2:str, idx:str, pc1:str|list=None,
          pc:str|list=None, shape:tuple=None, chunks:int=None,
          processes=False, n_workers=1, threads_per_worker=1,
          **dask_cluster_arg)

Get the point cloud in idx1 that are not in idx2. pc_chunk_size and n_pc_chunk are used to determine the final pc_chunk_size. If non of them are provided, the n_pc_chunk is set to n_chunk in idx1.

Type Default Details
idx1 str grid index or hillbert index of the first point cloud
idx2 str grid index or hillbert index of the second point cloud
idx str output, grid index or hillbert index of the union point cloud
pc1 str | list None path (in string) or list of path for the first point cloud data
pc str | list None output, path (in string) or list of path for the union point cloud data
shape tuple None image shape, faster if provided for grid index input
chunks int None chunk size in output data,optional
processes bool False use process for dask worker or thread
n_workers int 1 number of dask worker
threads_per_worker int 1 number of threads per dask worker
dask_cluster_arg

Usage:

pc_data1 = np.random.rand(1000,3).astype(np.float32)+1j*np.random.rand(1000,3).astype(np.float32)

gix1 = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
gix1.sort()
gix1 = np.stack(np.unravel_index(gix1,shape=(100,100)),axis=-1).astype(np.int32)

gix2 = np.random.choice(np.arange(100*100,dtype=np.int32),size=800,replace=False)
gix2.sort()
gix2 = np.stack(np.unravel_index(gix2,shape=(100,100)),axis=-1).astype(np.int32)

gix, iidx1 = mr.pc_diff(gix1,gix2)

pc_data = np.empty((gix.shape[0],*pc_data1.shape[1:]),dtype=pc_data1.dtype)
pc_data[:] = pc_data1[iidx1]

gix1_zarr = zarr.open('pc/gix1.zarr','w',shape=gix1.shape,dtype=gix1.dtype,chunks=(200,1))
gix2_zarr = zarr.open('pc/gix2.zarr','w',shape=gix2.shape,dtype=gix2.dtype,chunks=(200,1))
pc1_zarr = zarr.open('pc/pc1.zarr','w',shape=pc_data1.shape,dtype=pc_data1.dtype,chunks=(200,1))
gix1_zarr[:] = gix1
gix2_zarr[:] = gix2
pc1_zarr[:] = pc_data1
pc_diff('pc/gix1.zarr','pc/gix2.zarr','pc/gix.zarr')
pc_diff('pc/gix1.zarr','pc/gix2.zarr','pc/gix.zarr',pc1='pc/pc1.zarr', pc='pc/pc.zarr')
gix_zarr = zarr.open('pc/gix.zarr','r')
pc_zarr = zarr.open('pc/pc.zarr','r')
np.testing.assert_array_equal(gix_zarr[:],gix)
np.testing.assert_array_equal(pc_zarr[:],pc_data)
2024-08-14 17:16:03 - log_args - INFO - running function: pc_diff
2024-08-14 17:16:03 - log_args - INFO - fetching args:
2024-08-14 17:16:03 - log_args - INFO - idx1 = 'pc/gix1.zarr'
2024-08-14 17:16:03 - log_args - INFO - idx2 = 'pc/gix2.zarr'
2024-08-14 17:16:03 - log_args - INFO - idx = 'pc/gix.zarr'
2024-08-14 17:16:03 - log_args - INFO - pc1 = None
2024-08-14 17:16:03 - log_args - INFO - pc = None
2024-08-14 17:16:03 - log_args - INFO - shape = None
2024-08-14 17:16:03 - log_args - INFO - chunks = None
2024-08-14 17:16:03 - log_args - INFO - processes = False
2024-08-14 17:16:03 - log_args - INFO - n_workers = 1
2024-08-14 17:16:03 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:03 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:03 - log_args - INFO - fetching args done.
2024-08-14 17:16:03 - zarr_info - INFO - pc/gix1.zarr zarray shape, chunks, dtype: (1000, 2), (200, 1), int32
2024-08-14 17:16:03 - zarr_info - INFO - pc/gix2.zarr zarray shape, chunks, dtype: (800, 2), (200, 1), int32
2024-08-14 17:16:03 - pc_diff - INFO - loading idx1 and idx2 into memory.
2024-08-14 17:16:03 - pc_diff - INFO - calculate the diff.
2024-08-14 17:16:03 - pc_diff - INFO - number of points in the diff: 933
2024-08-14 17:16:04 - pc_diff - INFO - write intersect idx
2024-08-14 17:16:04 - pc_diff - INFO - write done
2024-08-14 17:16:04 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (933, 2), (200, 1), int32
2024-08-14 17:16:04 - pc_diff - INFO - no point cloud data provided, exit.
2024-08-14 17:16:04 - log_args - INFO - running function: pc_diff
2024-08-14 17:16:04 - log_args - INFO - fetching args:
2024-08-14 17:16:04 - log_args - INFO - idx1 = 'pc/gix1.zarr'
2024-08-14 17:16:04 - log_args - INFO - idx2 = 'pc/gix2.zarr'
2024-08-14 17:16:04 - log_args - INFO - idx = 'pc/gix.zarr'
2024-08-14 17:16:04 - log_args - INFO - pc1 = 'pc/pc1.zarr'
2024-08-14 17:16:04 - log_args - INFO - pc = 'pc/pc.zarr'
2024-08-14 17:16:04 - log_args - INFO - shape = None
2024-08-14 17:16:04 - log_args - INFO - chunks = None
2024-08-14 17:16:04 - log_args - INFO - processes = False
2024-08-14 17:16:04 - log_args - INFO - n_workers = 1
2024-08-14 17:16:04 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:04 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:04 - log_args - INFO - fetching args done.
2024-08-14 17:16:04 - zarr_info - INFO - pc/gix1.zarr zarray shape, chunks, dtype: (1000, 2), (200, 1), int32
2024-08-14 17:16:04 - zarr_info - INFO - pc/gix2.zarr zarray shape, chunks, dtype: (800, 2), (200, 1), int32
2024-08-14 17:16:04 - pc_diff - INFO - loading idx1 and idx2 into memory.
2024-08-14 17:16:04 - pc_diff - INFO - calculate the diff.
2024-08-14 17:16:04 - pc_diff - INFO - number of points in the diff: 933
2024-08-14 17:16:04 - pc_diff - INFO - write intersect idx
2024-08-14 17:16:04 - pc_diff - INFO - write done
2024-08-14 17:16:04 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (933, 2), (200, 1), int32
2024-08-14 17:16:04 - pc_diff - INFO - starting dask local cluster.
2024-08-14 17:16:04 - pc_diff - INFO - dask local cluster started.
2024-08-14 17:16:04 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:16:04 - zarr_info - INFO - pc/pc1.zarr zarray shape, chunks, dtype: (1000, 3), (200, 1), complex64
2024-08-14 17:16:04 - darr_info - INFO - pc1 dask array shape, chunksize, dtype: (1000, 3), (1000, 1), complex64
2024-08-14 17:16:04 - pc_diff - INFO - set up diff pc data dask array.
2024-08-14 17:16:04 - darr_info - INFO - pc dask array shape, chunksize, dtype: (933, 3), (933, 1), complex64
2024-08-14 17:16:04 - pc_diff - INFO - write pc to pc/pc.zarr
2024-08-14 17:16:04 - zarr_info - INFO - pc/pc.zarr zarray shape, chunks, dtype: (933, 3), (200, 1), complex64
2024-08-14 17:16:04 - pc_diff - INFO - computing graph setted. doing all the computing.
2024-08-14 17:16:04 - pc_diff - INFO - computing finished.d |  0.1s
2024-08-14 17:16:04 - pc_diff - INFO - dask cluster closed.
pc_data1 = np.random.rand(1000,3).astype(np.float32)+1j*np.random.rand(1000,3).astype(np.float32)

hix1 = np.random.choice(np.arange(100*100,dtype=np.int64),size=1000,replace=False)
hix1.sort()

hix2 = np.random.choice(np.arange(100*100,dtype=np.int64),size=800,replace=False)
hix2.sort()

hix, iidx1 = mr.pc_diff(hix1,hix2)

pc_data = np.empty((hix.shape[-1],*pc_data1.shape[1:]),dtype=pc_data1.dtype)
pc_data[:] = pc_data1[iidx1]

hix1_zarr = zarr.open('pc/hix1.zarr','w',shape=hix1.shape,dtype=hix1.dtype,chunks=(200,))
hix2_zarr = zarr.open('pc/hix2.zarr','w',shape=hix2.shape,dtype=hix2.dtype,chunks=(200,))
pc1_zarr = zarr.open('pc/pc1.zarr','w',shape=pc_data1.shape,dtype=pc_data1.dtype,chunks=(200,1))
hix1_zarr[:] = hix1
hix2_zarr[:] = hix2
pc1_zarr[:] = pc_data1
pc_diff('pc/hix1.zarr','pc/hix2.zarr','pc/hix.zarr')
pc_diff('pc/hix1.zarr','pc/hix2.zarr','pc/hix.zarr',pc1='pc/pc1.zarr', pc='pc/pc.zarr')
hix_zarr = zarr.open('pc/hix.zarr','r')
pc_zarr = zarr.open('pc/pc.zarr','r')
np.testing.assert_array_equal(hix_zarr[:],hix)
np.testing.assert_array_equal(pc_zarr[:],pc_data)
2024-08-14 17:16:04 - log_args - INFO - running function: pc_diff
2024-08-14 17:16:04 - log_args - INFO - fetching args:
2024-08-14 17:16:04 - log_args - INFO - idx1 = 'pc/hix1.zarr'
2024-08-14 17:16:04 - log_args - INFO - idx2 = 'pc/hix2.zarr'
2024-08-14 17:16:04 - log_args - INFO - idx = 'pc/hix.zarr'
2024-08-14 17:16:04 - log_args - INFO - pc1 = None
2024-08-14 17:16:04 - log_args - INFO - pc = None
2024-08-14 17:16:04 - log_args - INFO - shape = None
2024-08-14 17:16:04 - log_args - INFO - chunks = None
2024-08-14 17:16:04 - log_args - INFO - processes = False
2024-08-14 17:16:04 - log_args - INFO - n_workers = 1
2024-08-14 17:16:04 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:04 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:04 - log_args - INFO - fetching args done.
2024-08-14 17:16:04 - zarr_info - INFO - pc/hix1.zarr zarray shape, chunks, dtype: (1000,), (200,), int64
2024-08-14 17:16:04 - zarr_info - INFO - pc/hix2.zarr zarray shape, chunks, dtype: (800,), (200,), int64
2024-08-14 17:16:04 - pc_diff - INFO - loading idx1 and idx2 into memory.
2024-08-14 17:16:04 - pc_diff - INFO - calculate the diff.
2024-08-14 17:16:04 - pc_diff - INFO - number of points in the diff: 920
2024-08-14 17:16:04 - pc_diff - INFO - write intersect idx
2024-08-14 17:16:04 - pc_diff - INFO - write done
2024-08-14 17:16:04 - zarr_info - INFO - pc/hix.zarr zarray shape, chunks, dtype: (920,), (200,), int64
2024-08-14 17:16:04 - pc_diff - INFO - no point cloud data provided, exit.
2024-08-14 17:16:04 - log_args - INFO - running function: pc_diff
2024-08-14 17:16:04 - log_args - INFO - fetching args:
2024-08-14 17:16:04 - log_args - INFO - idx1 = 'pc/hix1.zarr'
2024-08-14 17:16:04 - log_args - INFO - idx2 = 'pc/hix2.zarr'
2024-08-14 17:16:04 - log_args - INFO - idx = 'pc/hix.zarr'
2024-08-14 17:16:04 - log_args - INFO - pc1 = 'pc/pc1.zarr'
2024-08-14 17:16:04 - log_args - INFO - pc = 'pc/pc.zarr'
2024-08-14 17:16:04 - log_args - INFO - shape = None
2024-08-14 17:16:04 - log_args - INFO - chunks = None
2024-08-14 17:16:04 - log_args - INFO - processes = False
2024-08-14 17:16:04 - log_args - INFO - n_workers = 1
2024-08-14 17:16:04 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:04 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:04 - log_args - INFO - fetching args done.
2024-08-14 17:16:04 - zarr_info - INFO - pc/hix1.zarr zarray shape, chunks, dtype: (1000,), (200,), int64
2024-08-14 17:16:04 - zarr_info - INFO - pc/hix2.zarr zarray shape, chunks, dtype: (800,), (200,), int64
2024-08-14 17:16:04 - pc_diff - INFO - loading idx1 and idx2 into memory.
2024-08-14 17:16:04 - pc_diff - INFO - calculate the diff.
2024-08-14 17:16:04 - pc_diff - INFO - number of points in the diff: 920
2024-08-14 17:16:04 - pc_diff - INFO - write intersect idx
2024-08-14 17:16:04 - pc_diff - INFO - write done
2024-08-14 17:16:04 - zarr_info - INFO - pc/hix.zarr zarray shape, chunks, dtype: (920,), (200,), int64
2024-08-14 17:16:04 - pc_diff - INFO - starting dask local cluster.
2024-08-14 17:16:04 - pc_diff - INFO - dask local cluster started.
2024-08-14 17:16:04 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:16:04 - zarr_info - INFO - pc/pc1.zarr zarray shape, chunks, dtype: (1000, 3), (200, 1), complex64
2024-08-14 17:16:04 - darr_info - INFO - pc1 dask array shape, chunksize, dtype: (1000, 3), (1000, 1), complex64
2024-08-14 17:16:04 - pc_diff - INFO - set up diff pc data dask array.
2024-08-14 17:16:04 - darr_info - INFO - pc dask array shape, chunksize, dtype: (920, 3), (920, 1), complex64
2024-08-14 17:16:04 - pc_diff - INFO - write pc to pc/pc.zarr
2024-08-14 17:16:04 - zarr_info - INFO - pc/pc.zarr zarray shape, chunks, dtype: (920, 3), (200, 1), complex64
2024-08-14 17:16:04 - pc_diff - INFO - computing graph setted. doing all the computing.
2024-08-14 17:16:05 - pc_diff - INFO - computing finished.d |  0.2s
2024-08-14 17:16:05 - pc_diff - INFO - dask cluster closed.

source

pc_logic_ras

 pc_logic_ras (ras, gix, operation:str, chunks:int=100000)

generate point cloud index based on logical operation of one raster image.

Type Default Details
ras the raster image used for thresholding
gix output, grid index of selected pixels
operation str logical operation on input ras
chunks int 100000 chunk size in output data, optional
ras = np.random.rand(100,100).astype(np.float32)
min_thres = 0.1; max_thres=0.5
is_pc = (ras>=min_thres) & (ras<=max_thres)
gix = np.stack(np.where(is_pc),axis=-1).astype(np.int32)
ras_zarr = zarr.open('pc/ras.zarr','rw',shape=ras.shape,dtype=ras.dtype,chunks=(10,100))
ras_zarr[:] = ras
pc_logic_ras('pc/ras.zarr','pc/gix.zarr',f'(ras>={min_thres})&(ras<={max_thres})')
gix_zarr = zarr.open('pc/gix.zarr','r')
np.testing.assert_array_equal(gix_zarr[:],gix)
2024-08-14 17:16:05 - log_args - INFO - running function: pc_logic_ras
2024-08-14 17:16:05 - log_args - INFO - fetching args:
2024-08-14 17:16:05 - log_args - INFO - ras = 'pc/ras.zarr'
2024-08-14 17:16:05 - log_args - INFO - gix = 'pc/gix.zarr'
2024-08-14 17:16:05 - log_args - INFO - operation = '(ras>=0.1)&(ras<=0.5)'
2024-08-14 17:16:05 - log_args - INFO - chunks = 100000
2024-08-14 17:16:05 - log_args - INFO - fetching args done.
2024-08-14 17:16:05 - zarr_info - INFO - pc/ras.zarr zarray shape, chunks, dtype: (100, 100), (10, 100), float32
2024-08-14 17:16:05 - pc_logic_ras - INFO - loading ras into memory.
2024-08-14 17:16:05 - pc_logic_ras - INFO - select pc based on operation: (ras>=0.1)&(ras<=0.5)
2024-08-14 17:16:05 - pc_logic_ras - INFO - number of selected pixels: 3987.
2024-08-14 17:16:05 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (3987, 2), (100000, 1), int32
2024-08-14 17:16:05 - pc_logic_ras - INFO - writing gix.
2024-08-14 17:16:05 - pc_logic_ras - INFO - write done.

source

pc_logic_pc

 pc_logic_pc (idx_in:str, pc_in:str, idx:str, operation:str,
              chunks:int=None)

generate point cloud index and data based on logical operation one point cloud data.

Type Default Details
idx_in str the grid index or hillbert index of input pc data
pc_in str the grid index or hillbert index cloud data used for thresholding
idx str output, grid index or hillbert index of selected pixels
operation str operator
chunks int None chunk size in output data,optional

Usage:

pc_in = np.random.rand(1000).astype(np.float32)
gix_in = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
gix_in.sort()
gix_in = np.stack(np.unravel_index(gix_in,shape=(100,100)),axis=-1).astype(np.int32)

min_thres = 0.1; max_thres=0.5
is_pc = (pc_in>=min_thres) & (pc_in<=max_thres)
gix = gix_in[is_pc]
pc_in_zarr = zarr.open('pc/pc_in.zarr','w',shape=pc_in.shape,dtype=pc_in.dtype,chunks=(100,))
gix_in_zarr = zarr.open('pc/gix_in.zarr','w',shape=gix_in.shape,dtype=gix_in.dtype,chunks=(100,1))
pc_in_zarr[:] = pc_in; gix_in_zarr[:] = gix_in
pc_logic_pc('pc/gix_in.zarr','pc/pc_in.zarr','pc/gix.zarr',f'(pc_in>={min_thres})&(pc_in<={max_thres})')
gix_zarr = zarr.open('pc/gix.zarr','r')
np.testing.assert_array_equal(gix_zarr[:],gix)
2024-08-14 17:16:05 - log_args - INFO - running function: pc_logic_pc
2024-08-14 17:16:05 - log_args - INFO - fetching args:
2024-08-14 17:16:05 - log_args - INFO - idx_in = 'pc/gix_in.zarr'
2024-08-14 17:16:05 - log_args - INFO - pc_in = 'pc/pc_in.zarr'
2024-08-14 17:16:05 - log_args - INFO - idx = 'pc/gix.zarr'
2024-08-14 17:16:05 - log_args - INFO - operation = '(pc_in>=0.1)&(pc_in<=0.5)'
2024-08-14 17:16:05 - log_args - INFO - chunks = None
2024-08-14 17:16:05 - log_args - INFO - fetching args done.
2024-08-14 17:16:05 - zarr_info - INFO - pc/gix_in.zarr zarray shape, chunks, dtype: (1000, 2), (100, 1), int32
2024-08-14 17:16:05 - zarr_info - INFO - pc/pc_in.zarr zarray shape, chunks, dtype: (1000,), (100,), float32
2024-08-14 17:16:05 - pc_logic_pc - INFO - loading idx_in into memory.
2024-08-14 17:16:05 - pc_logic_pc - INFO - loading pc_in into memory.
2024-08-14 17:16:05 - pc_logic_pc - INFO - select pc based on operation: (pc_in>=0.1)&(pc_in<=0.5)
2024-08-14 17:16:05 - pc_logic_pc - INFO - number of selected pixels: 435.
2024-08-14 17:16:05 - zarr_info - INFO - idx zarray shape, chunks, dtype: (435, 2), (100, 1), int32
2024-08-14 17:16:05 - pc_logic_pc - INFO - writing idx.
2024-08-14 17:16:05 - pc_logic_pc - INFO - write done.
pc_in = np.random.rand(1000).astype(np.float32)
hix_in = np.random.choice(np.arange(100*100,dtype=np.int64),size=1000,replace=False)
hix_in.sort()

min_thres = 0.1; max_thres=0.5
is_pc = (pc_in>=min_thres) & (pc_in<=max_thres)
hix = hix_in[is_pc]
pc_in_zarr = zarr.open('pc/pc_in.zarr','w',shape=pc_in.shape,dtype=pc_in.dtype,chunks=(100,))
hix_in_zarr = zarr.open('pc/hix_in.zarr','w',shape=hix_in.shape,dtype=hix_in.dtype,chunks=(100,))
pc_in_zarr[:] = pc_in; hix_in_zarr[:] = hix_in

pc_logic_pc('pc/hix_in.zarr','pc/pc_in.zarr','pc/hix.zarr',f'(pc_in>={min_thres})&(pc_in<={max_thres})')
hix_zarr = zarr.open('pc/hix.zarr','r')
np.testing.assert_array_equal(hix_zarr[:],hix)
2024-08-14 17:16:05 - log_args - INFO - running function: pc_logic_pc
2024-08-14 17:16:05 - log_args - INFO - fetching args:
2024-08-14 17:16:05 - log_args - INFO - idx_in = 'pc/hix_in.zarr'
2024-08-14 17:16:05 - log_args - INFO - pc_in = 'pc/pc_in.zarr'
2024-08-14 17:16:05 - log_args - INFO - idx = 'pc/hix.zarr'
2024-08-14 17:16:05 - log_args - INFO - operation = '(pc_in>=0.1)&(pc_in<=0.5)'
2024-08-14 17:16:05 - log_args - INFO - chunks = None
2024-08-14 17:16:05 - log_args - INFO - fetching args done.
2024-08-14 17:16:05 - zarr_info - INFO - pc/hix_in.zarr zarray shape, chunks, dtype: (1000,), (100,), int64
2024-08-14 17:16:05 - zarr_info - INFO - pc/pc_in.zarr zarray shape, chunks, dtype: (1000,), (100,), float32
2024-08-14 17:16:05 - pc_logic_pc - INFO - loading idx_in into memory.
2024-08-14 17:16:05 - pc_logic_pc - INFO - loading pc_in into memory.
2024-08-14 17:16:05 - pc_logic_pc - INFO - select pc based on operation: (pc_in>=0.1)&(pc_in<=0.5)
2024-08-14 17:16:05 - pc_logic_pc - INFO - number of selected pixels: 381.
2024-08-14 17:16:05 - zarr_info - INFO - idx zarray shape, chunks, dtype: (381,), (100,), int64
2024-08-14 17:16:05 - pc_logic_pc - INFO - writing idx.
2024-08-14 17:16:05 - pc_logic_pc - INFO - write done.

source

pc_select_data

 pc_select_data (idx_in:str, idx:str, pc_in:str|list, pc:str|list,
                 shape:tuple=None, chunks:int=None, processes=False,
                 n_workers=1, threads_per_worker=1, **dask_cluster_arg)

generate point cloud data based on its index and one point cloud data. The index of generated point cloud data must in the index of the old one.

Type Default Details
idx_in str the grid index or hillbert index of the input data
idx str the grid index or hillbert index of the output data
pc_in str | list path (in string) or list of path for the input point cloud data
pc str | list output, path (in string) or list of path for the output point cloud data
shape tuple None shape of the raster data the point cloud from, must be provided if idx is hix
chunks int None chunk size in output data, same as chunks of idx by default
processes bool False use process for dask worker or thread
n_workers int 1 number of dask worker
threads_per_worker int 1 number of threads per dask worker
dask_cluster_arg
pc_in = np.random.rand(1000,4).astype(np.float32)
gix_in = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
gix_in.sort()
gix_in = np.stack(np.unravel_index(gix_in,shape=(100,100)),axis=-1).astype(np.int32)

iidx_in = np.random.choice(np.arange(1000,dtype=np.int64),size=500,replace=False); iidx_in.sort()
gix = gix_in[iidx_in]
pc = pc_in[iidx_in]

pc_in_zarr = zarr.open('pc/pc_in.zarr','w',shape=pc_in.shape,dtype=pc_in.dtype,chunks=(100,1))
gix_in_zarr = zarr.open('pc/gix_in.zarr','w',shape=gix_in.shape,dtype=gix_in.dtype,chunks=(100,1))
gix_zarr = zarr.open('pc/gix.zarr','w',shape=gix.shape,dtype=gix.dtype,chunks=(100,1))
pc_in_zarr[:] = pc_in; gix_in_zarr[:] = gix_in; gix_zarr[:] = gix
pc_select_data('pc/gix_in.zarr','pc/gix.zarr','pc/pc_in.zarr','pc/pc.zarr')
pc_zarr = zarr.open('pc/pc.zarr','r')
np.testing.assert_array_equal(pc_zarr[:],pc)
2024-08-14 17:16:06 - log_args - INFO - running function: pc_select_data
2024-08-14 17:16:06 - log_args - INFO - fetching args:
2024-08-14 17:16:06 - log_args - INFO - idx_in = 'pc/gix_in.zarr'
2024-08-14 17:16:06 - log_args - INFO - idx = 'pc/gix.zarr'
2024-08-14 17:16:06 - log_args - INFO - pc_in = 'pc/pc_in.zarr'
2024-08-14 17:16:06 - log_args - INFO - pc = 'pc/pc.zarr'
2024-08-14 17:16:06 - log_args - INFO - shape = None
2024-08-14 17:16:06 - log_args - INFO - chunks = None
2024-08-14 17:16:06 - log_args - INFO - processes = False
2024-08-14 17:16:06 - log_args - INFO - n_workers = 1
2024-08-14 17:16:06 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:06 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:06 - log_args - INFO - fetching args done.
2024-08-14 17:16:06 - zarr_info - INFO - pc/gix_in.zarr zarray shape, chunks, dtype: (1000, 2), (100, 1), int32
2024-08-14 17:16:06 - zarr_info - INFO - pc/gix.zarr zarray shape, chunks, dtype: (500, 2), (100, 1), int32
2024-08-14 17:16:06 - pc_select_data - INFO - loading idx_in and idx into memory.
2024-08-14 17:16:07 - pc_select_data - INFO - starting dask local cluster.
2024-08-14 17:16:07 - pc_select_data - INFO - dask local cluster started.
2024-08-14 17:16:07 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:16:07 - zarr_info - INFO - pc/pc_in.zarr zarray shape, chunks, dtype: (1000, 4), (100, 1), float32
2024-08-14 17:16:07 - darr_info - INFO - pc_in dask array shape, chunksize, dtype: (1000, 4), (1000, 1), float32
2024-08-14 17:16:07 - pc_select_data - INFO - set up selected pc data dask array.
2024-08-14 17:16:07 - darr_info - INFO - pc dask array shape, chunksize, dtype: (500, 4), (500, 1), float32
2024-08-14 17:16:07 - pc_select_data - INFO - write pc to pc/pc.zarr
2024-08-14 17:16:07 - zarr_info - INFO - pc/pc.zarr zarray shape, chunks, dtype: (500, 4), (100, 1), float32
2024-08-14 17:16:07 - pc_select_data - INFO - computing graph setted. doing all the computing.
2024-08-14 17:16:07 - pc_select_data - INFO - computing finished.1s
2024-08-14 17:16:07 - pc_select_data - INFO - dask cluster closed.
pc_in = np.random.rand(1000).astype(np.float32)
hix_in = np.random.choice(np.arange(100*100,dtype=np.int64),size=1000,replace=False)
hix_in.sort()

iidx_in = np.random.choice(np.arange(1000,dtype=np.int64),size=500,replace=False)
iidx_in.sort()

hix = hix_in[iidx_in]
pc = pc_in[iidx_in]

pc_in_zarr = zarr.open('pc/pc_in.zarr','w',shape=pc_in.shape,dtype=pc_in.dtype,chunks=(100,))
hix_in_zarr = zarr.open('pc/hix_in.zarr','w',shape=hix_in.shape,dtype=hix_in.dtype,chunks=(100,))
hix_zarr = zarr.open('pc/hix.zarr','w',shape=hix.shape,dtype=hix.dtype,chunks=(100,))
pc_in_zarr[:] = pc_in; hix_in_zarr[:] = hix_in; hix_zarr[:] = hix

pc_select_data('pc/hix_in.zarr','pc/hix.zarr','pc/pc_in.zarr','pc/pc.zarr')
pc_zarr = zarr.open('pc/pc.zarr','r')
np.testing.assert_array_equal(pc_zarr[:],pc)
2024-08-14 17:16:08 - log_args - INFO - running function: pc_select_data
2024-08-14 17:16:08 - log_args - INFO - fetching args:
2024-08-14 17:16:08 - log_args - INFO - idx_in = 'pc/hix_in.zarr'
2024-08-14 17:16:08 - log_args - INFO - idx = 'pc/hix.zarr'
2024-08-14 17:16:08 - log_args - INFO - pc_in = 'pc/pc_in.zarr'
2024-08-14 17:16:08 - log_args - INFO - pc = 'pc/pc.zarr'
2024-08-14 17:16:08 - log_args - INFO - shape = None
2024-08-14 17:16:08 - log_args - INFO - chunks = None
2024-08-14 17:16:08 - log_args - INFO - processes = False
2024-08-14 17:16:08 - log_args - INFO - n_workers = 1
2024-08-14 17:16:08 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:08 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:08 - log_args - INFO - fetching args done.
2024-08-14 17:16:08 - zarr_info - INFO - pc/hix_in.zarr zarray shape, chunks, dtype: (1000,), (100,), int64
2024-08-14 17:16:08 - zarr_info - INFO - pc/hix.zarr zarray shape, chunks, dtype: (500,), (100,), int64
2024-08-14 17:16:08 - pc_select_data - INFO - loading idx_in and idx into memory.
2024-08-14 17:16:08 - pc_select_data - INFO - starting dask local cluster.
2024-08-14 17:16:08 - pc_select_data - INFO - dask local cluster started.
2024-08-14 17:16:08 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:16:08 - zarr_info - INFO - pc/pc_in.zarr zarray shape, chunks, dtype: (1000,), (100,), float32
2024-08-14 17:16:08 - darr_info - INFO - pc_in dask array shape, chunksize, dtype: (1000,), (1000,), float32
2024-08-14 17:16:08 - pc_select_data - INFO - set up selected pc data dask array.
2024-08-14 17:16:08 - darr_info - INFO - pc dask array shape, chunksize, dtype: (500,), (500,), float32
2024-08-14 17:16:08 - pc_select_data - INFO - write pc to pc/pc.zarr
2024-08-14 17:16:08 - zarr_info - INFO - pc/pc.zarr zarray shape, chunks, dtype: (500,), (100,), float32
2024-08-14 17:16:08 - pc_select_data - INFO - computing graph setted. doing all the computing.
2024-08-14 17:16:08 - pc_select_data - INFO - computing finished.1s
2024-08-14 17:16:08 - pc_select_data - INFO - dask cluster closed.

source

data_reduce

 data_reduce (data_in:str, out:str, map_func:Callable=None,
              reduce_func:Callable=<function mean at 0x7f906c7dbd70>,
              axis=0, post_map_func:Callable=None, processes=False,
              n_workers=1, threads_per_worker=1, **dask_cluster_arg)

reduction operation for dataset.

Type Default Details
data_in str path (in string) for the input data
out str output, path (in string) for the output data
map_func Callable None elementwise mapping function for input, no mapping by default
reduce_func Callable mean reduction function after mapping, np.mean by default
axis int 0 axis to be reduced, 0 for point cloud data, (0,1) for raster data
post_map_func Callable None post mapping after reduction, no mapping by default
processes bool False use process for dask worker or thread
n_workers int 1 number of dask worker
threads_per_worker int 1 number of threads per dask worker
dask_cluster_arg
pc_in1 = np.random.rand(1000).astype(np.float32)+1j*np.random.rand(1000).astype(np.float32)
pc_in2 = np.random.rand(800,3).astype(np.float32)+1j*np.random.rand(800,3).astype(np.float32)

pc1_zarr = zarr.open('pc/pc_in1.zarr','w',shape=pc_in1.shape,dtype=pc_data1.dtype,chunks=(200))
pc2_zarr = zarr.open('pc/pc_in2.zarr','w',shape=pc_in2.shape,dtype=pc_data2.dtype,chunks=(200,1))
pc1_zarr[:] = pc_in1
pc2_zarr[:] = pc_in2
data_reduce('pc/pc_in1.zarr','pc/pc_out1.zarr',map_func=np.abs,reduce_func=np.sum,post_map_func=lambda x: x/1000)
data_reduce('pc/pc_in2.zarr','pc/pc_out2.zarr',map_func=np.abs,reduce_func=np.sum,post_map_func=lambda x: x/800)
np.testing.assert_array_almost_equal(zarr.open('pc/pc_out1.zarr','r')[:][0], np.mean(np.abs(pc_in1),axis=0))
np.testing.assert_array_almost_equal(zarr.open('pc/pc_out2.zarr','r')[:], np.mean(np.abs(pc_in2),axis=0))
2024-08-14 17:16:08 - log_args - INFO - running function: data_reduce
2024-08-14 17:16:08 - log_args - INFO - fetching args:
2024-08-14 17:16:08 - log_args - INFO - data_in = 'pc/pc_in1.zarr'
2024-08-14 17:16:08 - log_args - INFO - out = 'pc/pc_out1.zarr'
2024-08-14 17:16:08 - log_args - INFO - map_func = <ufunc 'absolute'>
2024-08-14 17:16:08 - log_args - INFO - reduce_func = <function sum>
2024-08-14 17:16:08 - log_args - INFO - axis = 0
2024-08-14 17:16:08 - log_args - INFO - post_map_func = <function <lambda>>
2024-08-14 17:16:08 - log_args - INFO - processes = False
2024-08-14 17:16:08 - log_args - INFO - n_workers = 1
2024-08-14 17:16:08 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:08 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:08 - log_args - INFO - fetching args done.
2024-08-14 17:16:08 - zarr_info - INFO - pc/pc_in1.zarr zarray shape, chunks, dtype: (1000,), (200,), complex64
2024-08-14 17:16:08 - data_reduce - INFO - starting dask local cluster.
2024-08-14 17:16:08 - data_reduce - INFO - dask local cluster started.
2024-08-14 17:16:08 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:16:08 - darr_info - INFO - data_in dask array shape, chunksize, dtype: (1000,), (200,), complex64
2024-08-14 17:16:08 - darr_info - INFO - maped_data_in dask array shape, chunksize, dtype: (1000,), (200,), float32
2024-08-14 17:16:08 - darr_info - INFO - reduced data in every chunk dask array shape, chunksize, dtype: (5,), (1,), float32
2024-08-14 17:16:08 - data_reduce - INFO - computing graph setted. doing all the computing.
2024-08-14 17:16:09 - data_reduce - INFO - computing finished. 0.1s
2024-08-14 17:16:09 - data_reduce - INFO - dask cluster closed.
2024-08-14 17:16:09 - data_reduce - INFO - continue the reduction on reduced data over every chunk
2024-08-14 17:16:09 - data_reduce - INFO - post mapping
2024-08-14 17:16:09 - data_reduce - INFO - writing output.
2024-08-14 17:16:09 - data_reduce - INFO - done.
2024-08-14 17:16:09 - log_args - INFO - running function: data_reduce
2024-08-14 17:16:09 - log_args - INFO - fetching args:
2024-08-14 17:16:09 - log_args - INFO - data_in = 'pc/pc_in2.zarr'
2024-08-14 17:16:09 - log_args - INFO - out = 'pc/pc_out2.zarr'
2024-08-14 17:16:09 - log_args - INFO - map_func = <ufunc 'absolute'>
2024-08-14 17:16:09 - log_args - INFO - reduce_func = <function sum>
2024-08-14 17:16:09 - log_args - INFO - axis = 0
2024-08-14 17:16:09 - log_args - INFO - post_map_func = <function <lambda>>
2024-08-14 17:16:09 - log_args - INFO - processes = False
2024-08-14 17:16:09 - log_args - INFO - n_workers = 1
2024-08-14 17:16:09 - log_args - INFO - threads_per_worker = 1
2024-08-14 17:16:09 - log_args - INFO - dask_cluster_arg = {}
2024-08-14 17:16:09 - log_args - INFO - fetching args done.
2024-08-14 17:16:09 - zarr_info - INFO - pc/pc_in2.zarr zarray shape, chunks, dtype: (800, 3), (200, 1), complex64
2024-08-14 17:16:09 - data_reduce - INFO - starting dask local cluster.
2024-08-14 17:16:09 - data_reduce - INFO - dask local cluster started.
2024-08-14 17:16:09 - dask_cluster_info - INFO - dask cluster: LocalCluster(dashboard_link='http://10.211.48.16:8787/status', workers=1, threads=1, memory=1.46 TiB)
2024-08-14 17:16:09 - darr_info - INFO - data_in dask array shape, chunksize, dtype: (800, 3), (200, 1), complex64
2024-08-14 17:16:09 - darr_info - INFO - maped_data_in dask array shape, chunksize, dtype: (800, 3), (200, 1), float32
2024-08-14 17:16:09 - darr_info - INFO - reduced data in every chunk dask array shape, chunksize, dtype: (4, 3), (1, 1), float32
2024-08-14 17:16:09 - data_reduce - INFO - computing graph setted. doing all the computing.
2024-08-14 17:16:09 - data_reduce - INFO - computing finished. 0.1s
2024-08-14 17:16:09 - data_reduce - INFO - dask cluster closed.
2024-08-14 17:16:09 - data_reduce - INFO - continue the reduction on reduced data over every chunk
2024-08-14 17:16:09 - data_reduce - INFO - post mapping
2024-08-14 17:16:09 - data_reduce - INFO - writing output.
2024-08-14 17:16:09 - data_reduce - INFO - done.