|
Author: mturk
Date: Mon Mar 19 14:40:15 2012 New Revision: 1302479 URL: http://svn.apache.org/viewvc?rev=1302479&view=rev Log: Make sure we pull only if the sequence is above us Modified: tomcat/jk/trunk/native/common/jk_lb_worker.c Modified: tomcat/jk/trunk/native/common/jk_lb_worker.c URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_lb_worker.c?rev=1302479&r1=1302478&r2=1302479&view=diff ============================================================================== --- tomcat/jk/trunk/native/common/jk_lb_worker.c (original) +++ tomcat/jk/trunk/native/common/jk_lb_worker.c Mon Mar 19 14:40:15 2012 @@ -295,7 +295,7 @@ void jk_lb_pull(lb_worker_t *p, int lock p->name, p->sequence, p->s->h.sequence); if (locked == JK_FALSE) jk_shm_lock(); - if (p->sequence > p->s->h.sequence) { + if (p->sequence == p->s->h.sequence) { if (locked == JK_FALSE) jk_shm_unlock(); return; @@ -315,7 +315,7 @@ void jk_lb_pull(lb_worker_t *p, int lock for (i = 0; i < p->num_of_workers; i++) { lb_sub_worker_t *w = &p->lb_workers[i]; - if (w->sequence != w->s->h.sequence) { + if (w->sequence < w->s->h.sequence) { jk_worker_t *jw = w->worker; ajp_worker_t *aw = (ajp_worker_t *)jw->worker_private; @@ -365,13 +365,12 @@ void jk_lb_push(lb_worker_t *p, int lock p->s->lbmethod = p->lbmethod; p->s->lblock = p->lblock; p->s->max_packet_size = p->max_packet_size; - p->s->h.sequence = p->sequence; strncpy(p->s->session_cookie, p->session_cookie, JK_SHM_STR_SIZ); strncpy(p->s->session_path, p->session_path, JK_SHM_STR_SIZ); for (i = 0; i < p->num_of_workers; i++) { lb_sub_worker_t *w = &p->lb_workers[i]; - if (w->sequence != w->s->h.sequence) { + if (w->sequence < w->s->h.sequence) { jk_worker_t *jw = w->worker; ajp_worker_t *aw = (ajp_worker_t *)jw->worker_private; @@ -391,6 +390,7 @@ void jk_lb_push(lb_worker_t *p, int lock w->s->h.sequence = w->sequence; } } + p->s->h.sequence = p->sequence; if (locked == JK_FALSE) jk_shm_unlock(); @@ -562,7 +562,7 @@ static int recover_workers(lb_worker_t * ajp_worker_t *aw = NULL; JK_TRACE_ENTER(l); - if (p->sequence != p->s->h.sequence) + if (p->sequence < p->s->h.sequence) jk_lb_pull(p, JK_TRUE, l); for (i = 0; i < p->num_of_workers; i++) { w = &p->lb_workers[i]; @@ -1146,7 +1146,7 @@ static int JK_METHOD service(jk_endpoint /* Set returned error to OK */ *is_error = JK_HTTP_OK; - if (p->worker->sequence != p->worker->s->h.sequence) + if (p->worker->sequence < p->worker->s->h.sequence) jk_lb_pull(p->worker, JK_FALSE, l); for (i = 0; i < num_of_workers; i++) { lb_sub_worker_t *rec = &(p->worker->lb_workers[i]); @@ -1206,7 +1206,7 @@ static int JK_METHOD service(jk_endpoint retry, p->worker->retry_interval); jk_sleep(p->worker->retry_interval); /* Pull shared memory if something changed during sleep */ - if (p->worker->sequence != p->worker->s->h.sequence) + if (p->worker->sequence < p->worker->s->h.sequence) jk_lb_pull(p->worker, JK_FALSE, l); for (i = 0; i < num_of_workers; i++) { /* Copy the shared state info */ @@ -1631,7 +1631,6 @@ static int JK_METHOD validate(jk_worker_ strncpy(p->lb_workers[i].s->h.name, worker_names[i], JK_SHM_STR_SIZ); p->lb_workers[i].sequence = 0; - p->lb_workers[i].s->h.sequence = 0; p->lb_workers[i].lb_factor = jk_get_lb_factor(props, worker_names[i]); if (p->lb_workers[i].lb_factor < 1) { @@ -1786,7 +1785,7 @@ static int JK_METHOD init(jk_worker_t *p } p->sequence++; - jk_lb_push(p, JK_FALSE, log); + jk_lb_push(p, JK_TRUE, log); JK_TRACE_EXIT(log); return JK_TRUE; @@ -1878,7 +1877,6 @@ int JK_METHOD lb_worker_factory(jk_worke private_data->error_escalation_time = private_data->recover_wait_time / 2; private_data->max_reply_timeouts = 0; private_data->sequence = 0; - private_data->s->h.sequence = 0; private_data->next_offset = 0; *w = &private_data->worker; JK_TRACE_EXIT(l); --------------------------------------------------------------------- To unsubscribe, e-mail: [hidden email] For additional commands, e-mail: [hidden email] |
|
Hi Mladen,
On 19.03.2012 15:40, [hidden email] wrote: > Author: mturk > Date: Mon Mar 19 14:40:15 2012 > New Revision: 1302479 > > URL: http://svn.apache.org/viewvc?rev=1302479&view=rev > Log: > Make sure we pull only if the sequence is above us > > Modified: > tomcat/jk/trunk/native/common/jk_lb_worker.c > > Modified: tomcat/jk/trunk/native/common/jk_lb_worker.c > URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_lb_worker.c?rev=1302479&r1=1302478&r2=1302479&view=diff > ============================================================================== > --- tomcat/jk/trunk/native/common/jk_lb_worker.c (original) > +++ tomcat/jk/trunk/native/common/jk_lb_worker.c Mon Mar 19 14:40:15 2012 ... > @@ -365,13 +365,12 @@ void jk_lb_push(lb_worker_t *p, int lock > p->s->lbmethod = p->lbmethod; > p->s->lblock = p->lblock; > p->s->max_packet_size = p->max_packet_size; > - p->s->h.sequence = p->sequence; > strncpy(p->s->session_cookie, p->session_cookie, JK_SHM_STR_SIZ); > strncpy(p->s->session_path, p->session_path, JK_SHM_STR_SIZ); > > for (i = 0; i< p->num_of_workers; i++) { > lb_sub_worker_t *w =&p->lb_workers[i]; > - if (w->sequence != w->s->h.sequence) { > + if (w->sequence< w->s->h.sequence) { I think this one is wrong. It is inside push not pull, so it should be if (local > shared) and not "<". > jk_worker_t *jw = w->worker; > ajp_worker_t *aw = (ajp_worker_t *)jw->worker_private; The other changes in this commit look right. Regards, Rainer --------------------------------------------------------------------- To unsubscribe, e-mail: [hidden email] For additional commands, e-mail: [hidden email] |
|
On 03/26/2012 11:46 PM, Rainer Jung wrote:
> Hi Mladen, > >> >> for (i = 0; i< p->num_of_workers; i++) { >> lb_sub_worker_t *w =&p->lb_workers[i]; >> - if (w->sequence != w->s->h.sequence) { >> + if (w->sequence< w->s->h.sequence) { > > I think this one is wrong. It is inside push not pull, so it should be if (local > shared) and not "<". > Might be. The real question is why we need a difference in sequences for a push? IMO if the push is needed it should be unconditional, and heap sequence number should be updated *only* on pull (never directly) > > The other changes in this commit look right. > I'm not sure that's the case :) Think that the entire sequence push/pull should be reviewed. IMO the only reason for it is to lower the number of lock/unlock calls. The reason for this I see only in case of explicit batch update trough status worker. All other operations should just depend on volatile shared mem parameters which should ensure enough atomicity. Also we should apply the check in this way if (w->sequence < w->s->h.sequence) { jk_shm_lock(); /* Now check the value again after we obtained a lock */ if (w->sequence < w->s->h.sequence) { } } The reason for IIS crash is the fact that we can have N processes updating the shared memory (no parent process, so no one that would set initial values). Lock is done per worker basis, and this means that one worker can update the sequence, other will see it as next generation although the first one was not done updating all workers from config and then you have ka-boom. This makes the entire sequence based push/pull conceptually wrong, and we need to think of something else. Regards -- ^TM --------------------------------------------------------------------- To unsubscribe, e-mail: [hidden email] For additional commands, e-mail: [hidden email] |
|
In reply to this post by Rainer Jung-3
On 03/26/2012 11:46 PM, Rainer Jung wrote:
> Hi Mladen, > >> >> for (i = 0; i< p->num_of_workers; i++) { >> lb_sub_worker_t *w =&p->lb_workers[i]; >> - if (w->sequence != w->s->h.sequence) { >> + if (w->sequence< w->s->h.sequence) { > > I think this one is wrong. It is inside push not pull, so it should be if (local > shared) and not "<". > ... continue from first mail Inside lb_worker::init we have 1787: p->sequence++; 1788: jk_lb_push(p, JK_TRUE, log); Now, this is completely wrong. It will always set sequence to 1, so in case worker process gets recycled we have things out of sync. Anyhow why push at init stage? ... next inside lb_push we have 393: p->s->h.sequence = p->sequence; Now this should actually be p->s->h.sequence++; The shm sequence can be generations ahead of heap sequence number. Anyhow, like said, *->s->h.sequence should be updated only when someone explicitly clicks on the jkstatus page. Other then that I see no reason for its update. All other runtime params are intrinsic and volatile should guarantee its consistency. Regards -- ^TM --------------------------------------------------------------------- To unsubscribe, e-mail: [hidden email] For additional commands, e-mail: [hidden email] |
| Powered by Nabble | Edit this page |
