Mercury: Enabling Remote Procedure Call for High-Performance Computing
- J. Soumagne, D. Kimpe, J. Zounmevo, M. Chaarawi, Q. Koziol,
- A. Afsahi, and R. Ross
The HDF Group, Argonne National Laboratory, Queen’s University
Mercury: Enabling Remote Procedure Call for High-Performance - - PowerPoint PPT Presentation
Mercury: Enabling Remote Procedure Call for High-Performance Computing J. Soumagne, D. Kimpe , J. Zounmevo, M. Chaarawi, Q. Koziol, A. Afsahi, and R. Ross The HDF Group, Argonne National Laboratory , Queens University November 26, 2013 RPC
The HDF Group, Argonne National Laboratory, Queen’s University
2
2
2
2
3
◮ Need support for native transport ◮ Need to be easy to port to new machines
3
RPC proc RPC proc
4
RPC proc RPC proc Metadata (unexpected + expected messaging)
4
RPC proc RPC proc Metadata (unexpected + expected messaging) Bulk Data (RMA transfer)
4
RPC proc Network Abstraction Layer RPC proc Metadata (unexpected + expected messaging) Bulk Data (RMA transfer)
4
5
and get request id
and get request id
5
with request id and serialized parameters + Pre-post receive for server response
unexpected request
5
5
send / receive requests
serialized response
5
in_struct;
/* Initialize the interface */ [...] NA_Addr_lookup (network_class , server_name , & server_addr ); /* Register RPC call */ rpc_id = HG_REGISTER ("open", open_in_t , open_out_t ); /* Fill input parameters */ [...] in_struct.in_param0 = in_param0; /* Send RPC request */ HG_Forward (server_addr , rpc_id , &in_struct , &out_struct , & rpc_request ); /* Wait for completion */ HG_Wait(rpc_request , HG_MAX_IDLE_TIME , HG_STATUS_IGNORE ); /* Get
parameters */ [...]
6
int main(int argc , void *argv []) { /* Initialize the interface */ [...] /* Register RPC call */ HG_HANDLER_REGISTER ("open", open_rpc , open_in_t ,
/* Process RPC calls */ while (! finalized) { HG_Handler_process (timeout , HG_STATUS_IGNORE ); } /* Finalize the interface */ [...] }
7
int
handle) {
in_struct;
/* Get input parameters and bulk handle */ HG_Handler_get_input (handle , &in_struct); [...] in_param0 = in_struct.in_param0; /* Execute call */
/* Fill
structure */
/* Send response back */ HG_Handler_start_output (handle , &out_struct ); return HG_SUCCESS; }
8
9
segment and get handle
segment and get handle
9
segment and get handle
segment and get handle
memory handle
9
segment and get handle
segment and get handle
memory handle
tion using local/deseri- alized remote handles
9
segment and get handle
segment and get handle
memory handle
tion using local/deseri- alized remote handles
9
/* Initialize the interface */ [...] /* Register RPC call */ rpc_id = HG_REGISTER (" write ", write_in_t , write_out_t ); /* Create bulk handle */ HG_Bulk_handle_create (buf , buf_size , HG_BULK_READ_ONLY , & bulk_handle ); /* Attach bulk handle to input parameters */ [...] in_struct. bulk_handle = bulk_handle ; /* Send RPC request */ HG_Forward (server_addr , rpc_id , &in_struct , &out_struct , & rpc_request ); /* Wait for completion */ HG_Wait(rpc_request , HG_MAX_IDLE_TIME , HG_STATUS_IGNORE );
10
/* Get input parameters and bulk handle */ HG_Handler_get_input (handle , &in_struct); [...] bulk_handle = in_struct. bulk_handle ; /* Get size
data and allocate buffer */ nbytes = HG_Bulk_handle_get_size ( bulk_handle ); buf = malloc(nbytes); /* Create block handle to read data */ HG_Bulk_block_handle_create (buf , nbytes , HG_BULK_READWRITE , & bulk_block_handle ); /* Start reading bulk data */ HG_Bulk_read_all (client_addr , bulk_handle , bulk_block_handle , & bulk_request ); /* Wait for completion */ HG_Bulk_wait (bulk_request , HG_MAX_IDLE_TIME , HG_STATUS_IGNORE );
11
int HG_Bulk_handle_create_segments ( hg_bulk_segment_t *bulk_segments , size_t segment_count , unsigned long flags , hg_bulk_t *handle);
int NA_Mem_register_segments ( na_class_t *network_class , na_segment_t *segments , na_size_t segment_count , unsigned long flags , na_mem_handle_t *mem_handle );
12
int HG_Bulk_read (na_addr_t addr , hg_bulk_t bulk_handle , size_t bulk_offset , hg_bulk_block_t block_handle , size_t block_offset , size_t block_size , hg_bulk_request_t * bulk_request );
int HG_Bulk_write (na_addr_t addr , hg_bulk_t bulk_handle , size_t bulk_offset , hg_bulk_block_t block_handle , size_t block_offset , size_t block_size , hg_bulk_request_t * bulk_request );
13
/* Initialize the interface */ [...] /* Register RPC call */ rpc_id = HG_REGISTER (" write ", write_in_t , write_out_t ); /* Provide data layout information */ for (i = 0; i < BULK_NX ; i++) { segments[i]. address = buf[i]; segments[i]. size = BULK_NY * sizeof(int); } /* Create bulk handle with segment info */ HG_Bulk_handle_create_segments (segments , BULK_NX , HG_BULK_READ_ONLY , & bulk_handle ); /* Attach bulk handle to input parameters */ [...] in_struct. bulk_handle = bulk_handle ; /* Send RPC request */ HG_Forward (server_addr , rpc_id , &in_struct , &out_struct , & rpc_request );
14
/* Get input parameters and bulk handle */ HG_Handler_get_input (handle , &in_struct); [...] bulk_handle = in_struct. bulk_handle ; /* Get size
data and allocate buffer */ nbytes = HG_Bulk_handle_get_size ( bulk_handle ); buf = malloc(nbytes); /* Create block handle to read data */ HG_Bulk_block_handle_create (buf , nbytes , HG_BULK_READWRITE , & bulk_block_handle ); /* Start reading bulk data */ HG_Bulk_read_all (client_addr , bulk_handle , bulk_block_handle , & bulk_request ); /* Wait for completion */ HG_Bulk_wait (bulk_request , HG_MAX_IDLE_TIME , HG_STATUS_IGNORE );
15
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call 16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call 16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call 16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call 16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call 16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call 16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call 16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes)
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes)
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes)
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) 1 2 3
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) 1 2 3 W
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) 1 2 3 E
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) 1 2 3 W
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) 1 2 3 E
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) 1 2 3 W
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) 1 2 3 E
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) 1 2 3 E
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) 1 2 3 E
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) 1 2 3 E
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) 1 2 3 E
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) 1 2 E
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) 1 E
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes) E
16
◮ Makes us pay the latency of an entire RMA read
◮ Transfers can complete while writing / executing the RPC call
Data buffer (nbytes)
16
1000 2000 3000 4000 5000 6000 2 4 8 16 32 64 128 256 Aggregate bandwidth (MB/s) Number of client processes
17
1000 2000 3000 4000 5000 6000 7000 8000 9000 2 4 8 16 32 64 128 256 Aggregate bandwidth (MB/s) Number of client processes
18
19
20
MERCURY_GEN_PROC (
(( hg_string_t )(path) ) (( int32_t)(flags)) (( uint32_t)(mode)) )
Macro
MERCURY_GEN_PROC ( struct_type_name , fields )
/* Define
*/ typedef struct { hg_string_t path; int32_t flags; uint32_t mode; } open_in_t; /* Define hg_proc_open_in_t */ static inline int hg_proc_open_in_t (hg_proc_t proc , void *data) { int ret = HG_SUCCESS ;
*) data; ret = hg_proc_hg_string_t (proc , &struct_data -> path); if (ret != HG_SUCCESS ) { HG_ERROR_DEFAULT ("Proc error "); ret = HG_FAIL; return ret; } ret = hg_proc_int32_t (proc , &struct_data ->flags) ; if (ret != HG_SUCCESS ) { HG_ERROR_DEFAULT ("Proc error "); ret = HG_FAIL; return ret; } ret = hg_proc_uint32_t (proc , &struct_data ->mode) ; if (ret != HG_SUCCESS ) { HG_ERROR_DEFAULT ("Proc error "); ret = HG_FAIL; return ret; } return ret; }
Generated Code
Generates proc and struct
21
22
23