From: Luis Kornblueh (luis.kornblueh_at_[hidden])
Date: 2007-05-16 10:08:53


Hi,

well we currently try to change over to ofed. The same time we try to change
to openmpi-1.2.1 from openpi-1.2b3, unfortunately we get a SEGV in mpiexec ;-)

Cheerio,
Luis

> Greetings and thanks for the patch!
>
> We actually made this change on the development trunk but never moved
> it to the release branch. See https://svn.open-mpi.org/trac/ompi/
> changeset/13709
>
> The problem is that we accidentally used a gcc-ism in the mvapi BTL
> (i.e., the older InfiniBand driver). We didn't think too many people
> were using the mvapi BTL, and therefore didn't move it over to the
> v1.2 branch. Sorry about that!
>
> Do you have a strong requirement for using mvapi? Specifically, is
> there any chance that you can upgrade to OFED? I ask because all
> current and future OMPI work for InfiniBand is being done in the
> openib BTL (not the mvapi BTL -- other than this fix, we haven't made
> any changes to the mvapi BTL for a long, long time).
>
> See http://www.open-mpi.org/faq/?category=openfabrics#vapi-support.
>
>
>
> On May 16, 2007, at 12:08 AM, Luis Kornblueh wrote:
>
> > Hi everybody,
> >
> > we tried to install openmpi with Sun cc on Linux. IT required some
> > patches. I add the necessary pathces here. I hope you can include
> > those.
> >
> > Cheerio,
> > Luis
> >
> > diff -Naur openmpi-1.2.1/ompi/mca/btl/mvapi/btl_mvapi.c
> > openmpi-1.2.1_patched/ompi/mca/btl/mvapi/btl_mvapi.c
> > --- openmpi-1.2.1/ompi/mca/btl/mvapi/btl_mvapi.c 2007-04-19
> > 18:30:54.000000000 +0200
> > +++ openmpi-1.2.1_patched/ompi/mca/btl/mvapi/btl_mvapi.c 2007-05-09
> > 14:33:24.000000000 +0200
> > @@ -463,7 +463,7 @@
> > mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*)descriptor;
> > frag->endpoint = endpoint;
> > frag->hdr->tag = tag;
> > - frag->sr_desc.opcode = VAPI_SEND;
> > + frag->desc.sr_desc.opcode = VAPI_SEND;
> > return mca_btl_mvapi_endpoint_send(endpoint, frag);
> > }
> >
> > @@ -481,7 +481,7 @@
> >
> > /* setup for queued requests */
> > frag->endpoint = endpoint;
> > - frag->sr_desc.opcode = VAPI_RDMA_WRITE;
> > + frag->desc.sr_desc.opcode = VAPI_RDMA_WRITE;
> >
> > /* check for a send wqe */
> > if (OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,-1) < 0) {
> > @@ -494,12 +494,12 @@
> > /* post descriptor */
> > } else {
> >
> > - frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_lp;
> > - frag->sr_desc.remote_addr = (VAPI_virt_addr_t) frag-
> > >base.des_dst->seg_addr.lval;
> > - frag->sr_desc.r_key = frag->base.des_dst->seg_key.key32[0];
> > + frag->desc.sr_desc.remote_qp = endpoint-
> > >rem_info.rem_qp_num_lp;
> > + frag->desc.sr_desc.remote_addr = (VAPI_virt_addr_t) frag-
> > >base.des_dst->seg_addr.lval;
> > + frag->desc.sr_desc.r_key = frag->base.des_dst-
> > >seg_key.key32[0];
> > frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t)
> > frag->base.des_src->seg_addr.pval;
> > frag->sg_entry.len = frag->base.des_src->seg_len;
> > - if(VAPI_OK != VAPI_post_sr(mvapi_btl->nic, endpoint-
> > >lcl_qp_hndl_lp, &frag->sr_desc)) {
> > + if(VAPI_OK != VAPI_post_sr(mvapi_btl->nic, endpoint-
> > >lcl_qp_hndl_lp, &frag->desc.sr_desc)) {
> > rc = OMPI_ERROR;
> > } else {
> > rc = OMPI_SUCCESS;
> > @@ -531,7 +531,7 @@
> > mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*) descriptor;
> >
> > frag->endpoint = endpoint;
> > - frag->sr_desc.opcode = VAPI_RDMA_READ;
> > + frag->desc.sr_desc.opcode = VAPI_RDMA_READ;
> >
> > /* check for a send wqe */
> > if (OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,-1) < 0) {
> > @@ -555,13 +555,13 @@
> >
> > } else {
> >
> > - frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_lp;
> > - frag->sr_desc.remote_addr = (VAPI_virt_addr_t) frag-
> > >base.des_src->seg_addr.lval;
> > - frag->sr_desc.r_key = frag->base.des_src->seg_key.key32[0];
> > + frag->desc.sr_desc.remote_qp = endpoint-
> > >rem_info.rem_qp_num_lp;
> > + frag->desc.sr_desc.remote_addr = (VAPI_virt_addr_t) frag-
> > >base.des_src->seg_addr.lval;
> > + frag->desc.sr_desc.r_key = frag->base.des_src-
> > >seg_key.key32[0];
> > frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t)
> > frag->base.des_dst->seg_addr.pval;
> > frag->sg_entry.len = frag->base.des_dst->seg_len;
> >
> > - if(VAPI_OK != VAPI_post_sr(mvapi_btl->nic, endpoint-
> > >lcl_qp_hndl_lp, &frag->sr_desc)) {
> > + if(VAPI_OK != VAPI_post_sr(mvapi_btl->nic, endpoint-
> > >lcl_qp_hndl_lp, &frag->desc.sr_desc)) {
> > rc = OMPI_ERROR;
> > } else {
> > rc = OMPI_SUCCESS;
> > diff -Naur openmpi-1.2.1/ompi/mca/btl/mvapi/btl_mvapi_endpoint.c
> > openmpi-1.2.1_patched/ompi/mca/btl/mvapi/btl_mvapi_endpoint.c
> > --- openmpi-1.2.1/ompi/mca/btl/mvapi/btl_mvapi_endpoint.c
> > 2007-04-19 18:30:54.000000000 +0200
> > +++ openmpi-1.2.1_patched/ompi/mca/btl/mvapi/btl_mvapi_endpoint.c
> > 2007-05-09 14:45:31.000000000 +0200
> > @@ -143,7 +143,7 @@
> > }
> > }
> >
> > - frag->sr_desc.remote_qkey = 0;
> > + frag->desc.sr_desc.remote_qkey = 0;
> > frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag-
> > >hdr;
> > frag->sg_entry.len =
> > frag->segment.seg_len + sizeof(mca_btl_mvapi_header_t) +
> > @@ -153,14 +153,14 @@
> > mca_btl_mvapi_footer_t* ftr =
> > (mca_btl_mvapi_footer_t*)(((char*)frag-
> > >segment.seg_addr.pval) +
> > frag->segment.seg_len);
> > - frag->sr_desc.opcode = VAPI_RDMA_WRITE;
> > + frag->desc.sr_desc.opcode = VAPI_RDMA_WRITE;
> > MCA_BTL_MVAPI_RDMA_FRAG_SET_SIZE(ftr, frag->sg_entry.len);
> > MCA_BTL_MVAPI_RDMA_MAKE_LOCAL(ftr);
> > #ifdef OMPI_ENABLE_DEBUG
> > ftr->seq = endpoint->eager_rdma_remote.seq++;
> > #endif
> > - frag->sr_desc.r_key = (VAPI_rkey_t)endpoint-
> > >eager_rdma_remote.rkey;
> > - frag->sr_desc.remote_addr = (VAPI_virt_addr_t)
> > + frag->desc.sr_desc.r_key = (VAPI_rkey_t)endpoint-
> > >eager_rdma_remote.rkey;
> > + frag->desc.sr_desc.remote_addr = (VAPI_virt_addr_t)
> > endpoint->eager_rdma_remote.base.lval +
> > endpoint->eager_rdma_remote.head *
> > mvapi_btl->eager_rdma_frag_size +
> > @@ -168,17 +168,17 @@
> > sizeof(mca_btl_mvapi_header_t) +
> > frag->size +
> > sizeof(mca_btl_mvapi_footer_t);
> > - frag->sr_desc.remote_addr -= frag->sg_entry.len;
> > + frag->desc.sr_desc.remote_addr -= frag->sg_entry.len;
> > MCA_BTL_MVAPI_RDMA_NEXT_INDEX (endpoint-
> > >eager_rdma_remote.head);
> > } else {
> > - frag->sr_desc.opcode = VAPI_SEND;
> > + frag->desc.sr_desc.opcode = VAPI_SEND;
> > }
> >
> >
> > if(frag->sg_entry.len <= mvapi_btl->ib_inline_max) {
> > - ret = EVAPI_post_inline_sr(mvapi_btl->nic, qp_hndl, &frag-
> > >sr_desc);
> > + ret = EVAPI_post_inline_sr(mvapi_btl->nic, qp_hndl, &frag-
> > >desc.sr_desc);
> > } else {
> > - ret = VAPI_post_sr(mvapi_btl->nic, qp_hndl, &frag->sr_desc);
> > + ret = VAPI_post_sr(mvapi_btl->nic, qp_hndl, &frag-
> > >desc.sr_desc);
> > }
> >
> > if(VAPI_OK != ret) {
> > @@ -1072,15 +1072,15 @@
> > OPAL_THREAD_ADD32(&endpoint->rd_credits_lp, -frag->hdr->credits);
> > ((mca_btl_mvapi_control_header_t *)frag-
> > >segment.seg_addr.pval)->type = MCA_BTL_MVAPI_CONTROL_NOOP;
> >
> > - frag->sr_desc.opcode = VAPI_SEND;
> > + frag->desc.sr_desc.opcode = VAPI_SEND;
> > frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag-
> > >hdr;
> > frag->sg_entry.len = sizeof(mca_btl_mvapi_header_t) +
> > sizeof(mca_btl_mvapi_control_header_t);
> >
> > if(sizeof(mca_btl_mvapi_header_t) <= mvapi_btl->ib_inline_max) {
> > - ret = EVAPI_post_inline_sr(mvapi_btl->nic, endpoint-
> > >lcl_qp_hndl_lp, &frag->sr_desc);
> > + ret = EVAPI_post_inline_sr(mvapi_btl->nic, endpoint-
> > >lcl_qp_hndl_lp, &frag->desc.sr_desc);
> > } else {
> > - ret = VAPI_post_sr(mvapi_btl->nic, endpoint-
> > >lcl_qp_hndl_lp, &frag->sr_desc);
> > + ret = VAPI_post_sr(mvapi_btl->nic, endpoint-
> > >lcl_qp_hndl_lp, &frag->desc.sr_desc);
> > }
> > if(ret != VAPI_SUCCESS) {
> > OPAL_THREAD_ADD32(&endpoint->sd_credits_lp, -1);
> > @@ -1151,15 +1151,15 @@
> > ((mca_btl_mvapi_control_header_t *)frag-
> > >segment.seg_addr.pval)->type = MCA_BTL_MVAPI_CONTROL_NOOP;
> >
> >
> > - frag->sr_desc.opcode = VAPI_SEND;
> > + frag->desc.sr_desc.opcode = VAPI_SEND;
> > frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag-
> > >hdr;
> > frag->sg_entry.len = sizeof(mca_btl_mvapi_header_t) +
> > sizeof(mca_btl_mvapi_control_header_t);
> >
> > if(sizeof(mca_btl_mvapi_header_t) <= mvapi_btl->ib_inline_max) {
> > - ret = EVAPI_post_inline_sr(mvapi_btl->nic, endpoint-
> > >lcl_qp_hndl_hp, &frag->sr_desc);
> > + ret = EVAPI_post_inline_sr(mvapi_btl->nic, endpoint-
> > >lcl_qp_hndl_hp, &frag->desc.sr_desc);
> > } else {
> > - ret = VAPI_post_sr(mvapi_btl->nic, endpoint-
> > >lcl_qp_hndl_hp, &frag->sr_desc);
> > + ret = VAPI_post_sr(mvapi_btl->nic, endpoint-
> > >lcl_qp_hndl_hp, &frag->desc.sr_desc);
> > }
> > if(ret != VAPI_SUCCESS) {
> > OPAL_THREAD_ADD32(&endpoint->sd_credits_lp, -1);
> > diff -Naur openmpi-1.2.1/ompi/mca/btl/mvapi/btl_mvapi_endpoint.h
> > openmpi-1.2.1_patched/ompi/mca/btl/mvapi/btl_mvapi_endpoint.h
> > --- openmpi-1.2.1/ompi/mca/btl/mvapi/btl_mvapi_endpoint.h
> > 2007-04-19 18:30:54.000000000 +0200
> > +++ openmpi-1.2.1_patched/ompi/mca/btl/mvapi/btl_mvapi_endpoint.h
> > 2007-05-09 14:38:43.000000000 +0200
> > @@ -234,7 +234,7 @@
> > frag->sg_entry.len = frag->size + \
> > ((unsigned char*) frag->segment.seg_addr.pval- \
> > (unsigned char*) frag->hdr); \
> > - desc_post[i] = frag->rr_desc; \
> > + desc_post[i] = frag->desc.rr_desc; \
> > }\
> > rc = EVAPI_post_rr_list( nic, \
> > qp, \
> > diff -Naur openmpi-1.2.1/ompi/mca/btl/mvapi/btl_mvapi_frag.c
> > openmpi-1.2.1_patched/ompi/mca/btl/mvapi/btl_mvapi_frag.c
> > --- openmpi-1.2.1/ompi/mca/btl/mvapi/btl_mvapi_frag.c 2007-04-19
> > 18:30:54.000000000 +0200
> > +++ openmpi-1.2.1_patched/ompi/mca/btl/mvapi/btl_mvapi_frag.c
> > 2007-05-09 14:47:38.000000000 +0200
> > @@ -43,12 +43,12 @@
> > frag->base.des_dst = NULL;
> > frag->base.des_dst_cnt = 0;
> >
> > - frag->sr_desc.comp_type = VAPI_SIGNALED;
> > - frag->sr_desc.opcode = VAPI_SEND;
> > - frag->sr_desc.remote_qkey = 0;
> > - frag->sr_desc.sg_lst_len = 1;
> > - frag->sr_desc.sg_lst_p = &frag->sg_entry;
> > - frag->sr_desc.id = (VAPI_virt_addr_t) (MT_virt_addr_t) frag;
> > + frag->desc.sr_desc.comp_type = VAPI_SIGNALED;
> > + frag->desc.sr_desc.opcode = VAPI_SEND;
> > + frag->desc.sr_desc.remote_qkey = 0;
> > + frag->desc.sr_desc.sg_lst_len = 1;
> > + frag->desc.sr_desc.sg_lst_p = &frag->sg_entry;
> > + frag->desc.sr_desc.id = (VAPI_virt_addr_t) (MT_virt_addr_t) frag;
> >
> > }
> >
> > @@ -61,11 +61,11 @@
> > frag->base.des_src = NULL;
> > frag->base.des_src_cnt = 0;
> >
> > - frag->rr_desc.comp_type = VAPI_SIGNALED;
> > - frag->rr_desc.opcode = VAPI_RECEIVE;
> > - frag->rr_desc.sg_lst_len = 1;
> > - frag->rr_desc.sg_lst_p = &frag->sg_entry;
> > - frag->rr_desc.id = (VAPI_virt_addr_t) (MT_virt_addr_t) frag;
> > + frag->desc.rr_desc.comp_type = VAPI_SIGNALED;
> > + frag->desc.rr_desc.opcode = VAPI_RECEIVE;
> > + frag->desc.rr_desc.sg_lst_len = 1;
> > + frag->desc.rr_desc.sg_lst_p = &frag->sg_entry;
> > + frag->desc.rr_desc.id = (VAPI_virt_addr_t) (MT_virt_addr_t) frag;
> >
> >
> > }
> > diff -Naur openmpi-1.2.1/ompi/mca/btl/mvapi/btl_mvapi_frag.h
> > openmpi-1.2.1_patched/ompi/mca/btl/mvapi/btl_mvapi_frag.h
> > --- openmpi-1.2.1/ompi/mca/btl/mvapi/btl_mvapi_frag.h 2007-04-19
> > 18:30:54.000000000 +0200
> > +++ openmpi-1.2.1_patched/ompi/mca/btl/mvapi/btl_mvapi_frag.h
> > 2007-05-09 14:31:08.000000000 +0200
> > @@ -92,7 +92,7 @@
> > union{
> > VAPI_rr_desc_t rr_desc;
> > VAPI_sr_desc_t sr_desc;
> > - };
> > + } desc;
> > VAPI_sg_lst_entry_t sg_entry;
> > mca_btl_mvapi_header_t *hdr;
> > mca_btl_mvapi_footer_t *ftr;
> > @@ -165,7 +165,7 @@
> >
> > #define MCA_BTL_IB_FRAG_PROGRESS(frag) \
> > do { \
> > - switch(frag->sr_desc.opcode) { \
> > + switch(frag->desc.sr_desc.opcode) { \
> > case VAPI_SEND: \
> > if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag-
> > >endpoint, frag)) { \
> > BTL_ERROR(("error in posting pending send\n")); \
> > @@ -186,7 +186,7 @@
> > } \
> > break; \
> > default: \
> > - BTL_ERROR(("error in posting pending operation, invalide
> > opcode %d\n", frag->sr_desc.opcode)); \
> > + BTL_ERROR(("error in posting pending operation, invalide
> > opcode %d\n", frag->desc.sr_desc.opcode)); \
> > break; \
> > } \
> > } while (0)
> > diff -Naur openmpi-1.2.1/ompi/mca/btl/mvapi/btl_mvapi.h
> > openmpi-1.2.1_patched/ompi/mca/btl/mvapi/btl_mvapi.h
> > --- openmpi-1.2.1/ompi/mca/btl/mvapi/btl_mvapi.h 2007-04-19
> > 18:30:54.000000000 +0200
> > +++ openmpi-1.2.1_patched/ompi/mca/btl/mvapi/btl_mvapi.h 2007-05-09
> > 14:37:23.000000000 +0200
> > @@ -260,7 +260,7 @@
> > frag->sg_entry.len = frag->size + \
> > ((unsigned char*) frag->segment.seg_addr.pval- \
> > (unsigned char*) frag->hdr); \
> > - desc_post[i] = frag->rr_desc; \
> > + desc_post[i] = frag->desc.rr_desc; \
> > }\
> > ret = VAPI_post_srq( nic, \
> > srq_hndl, \
> >
> >
> > --
> > \\\\\\
> > (-0^0-)
> > --------------------------oOO--(_)--OOo-----------------------------
> >
> > Luis Kornblueh Tel. : +49-40-41173289
> > Max-Planck-Institute for Meteorology Fax. : +49-40-41173298
> > Bundesstr. 53
> > D-20146 Hamburg Email: luis.kornblueh_at_[hidden]
> > Federal Republic of Germany
> > _______________________________________________
> > users mailing list
> > users_at_[hidden]
> > http://www.open-mpi.org/mailman/listinfo.cgi/users
>
>
> --
> Jeff Squyres
> Cisco Systems
>
> _______________________________________________
> users mailing list
> users_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/users

-- 
                             \\\\\\
                             (-0^0-)
--------------------------oOO--(_)--OOo-----------------------------
 Luis Kornblueh                           Tel. : +49-40-41173289
 Max-Planck-Institute for Meteorology     Fax. : +49-40-41173298
 Bundesstr. 53              
 D-20146 Hamburg                   Email: luis.kornblueh_at_[hidden]
 Federal Republic of Germany