Merge in fetch updates/break out memory/create error bits for host failures.
[m6w6/libmemcached] / libmemcached / connect.cc
1 /* vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
2 *
3 * Libmemcached library
4 *
5 * Copyright (C) 2011 Data Differential, http://datadifferential.com/
6 * Copyright (C) 2006-2010 Brian Aker All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met:
11 *
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * * Redistributions in binary form must reproduce the above
16 * copyright notice, this list of conditions and the following disclaimer
17 * in the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * * The names of its contributors may not be used to endorse or
21 * promote products derived from this software without specific prior
22 * written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 *
36 */
37
38
39 #include <libmemcached/common.h>
40 #include <assert.h>
41 #include <sys/time.h>
42 #include <time.h>
43
44 static memcached_return_t connect_poll(memcached_server_st *ptr)
45 {
46 struct pollfd fds[1];
47 fds[0].fd = ptr->fd;
48 fds[0].events = POLLOUT;
49
50 int error;
51 size_t loop_max= 5;
52
53 while (--loop_max) // Should only loop on cases of ERESTART or EINTR
54 {
55 error= poll(fds, 1, ptr->root->connect_timeout);
56
57 switch (error)
58 {
59 case 1:
60 {
61 int err;
62 socklen_t len= sizeof (err);
63 (void)getsockopt(ptr->fd, SOL_SOCKET, SO_ERROR, &err, &len);
64
65 // We check the value to see what happened wth the socket.
66 if (err == 0)
67 {
68 return MEMCACHED_SUCCESS;
69 }
70 else
71 {
72 ptr->cached_errno= errno;
73
74 return MEMCACHED_ERRNO;
75 }
76 }
77 case 0:
78 return MEMCACHED_TIMEOUT;
79 default: // A real error occurred and we need to completely bail
80 WATCHPOINT_ERRNO(get_socket_errno());
81 switch (get_socket_errno())
82 {
83 #ifdef TARGET_OS_LINUX
84 case ERESTART:
85 #endif
86 case EINTR:
87 continue;
88 default:
89 if (fds[0].revents & POLLERR)
90 {
91 int err;
92 socklen_t len= sizeof (err);
93 (void)getsockopt(ptr->fd, SOL_SOCKET, SO_ERROR, &err, &len);
94 ptr->cached_errno= (err == 0) ? get_socket_errno() : err;
95 }
96 else
97 {
98 ptr->cached_errno= get_socket_errno();
99 }
100
101 (void)closesocket(ptr->fd);
102 ptr->fd= INVALID_SOCKET;
103
104 return MEMCACHED_ERRNO;
105 }
106 }
107 }
108
109 // This should only be possible from ERESTART or EINTR;
110 ptr->cached_errno= get_socket_errno();
111
112 return MEMCACHED_ERRNO;
113 }
114
115 static memcached_return_t set_hostinfo(memcached_server_st *server)
116 {
117 char str_port[NI_MAXSERV];
118
119 assert(! server->address_info); // We cover the case where a programming mistake has been made.
120 if (server->address_info)
121 {
122 freeaddrinfo(server->address_info);
123 server->address_info= NULL;
124 server->address_info_next= NULL;
125 }
126
127 int length= snprintf(str_port, NI_MAXSERV, "%u", (uint32_t)server->port);
128 if (length >= NI_MAXSERV || length < 0)
129 return MEMCACHED_FAILURE;
130
131 struct addrinfo hints;
132 memset(&hints, 0, sizeof(struct addrinfo));
133
134 #if 0
135 hints.ai_family= AF_INET;
136 #endif
137 if (server->type == MEMCACHED_CONNECTION_UDP)
138 {
139 hints.ai_protocol= IPPROTO_UDP;
140 hints.ai_socktype= SOCK_DGRAM;
141 }
142 else
143 {
144 hints.ai_socktype= SOCK_STREAM;
145 hints.ai_protocol= IPPROTO_TCP;
146 }
147
148 uint32_t counter= 5;
149 while (--counter)
150 {
151 int errcode;
152 switch(errcode= getaddrinfo(server->hostname, str_port, &hints, &server->address_info))
153 {
154 case 0:
155 break;
156 case EAI_AGAIN:
157 if (counter > 1)
158 {
159 #ifndef WIN32
160 struct timespec dream, rem;
161
162 dream.tv_nsec= 1000;
163 dream.tv_sec= 0;
164
165 nanosleep(&dream, &rem);
166 #endif
167 continue;
168 }
169 else
170 {
171 return memcached_set_error_string(*server, MEMCACHED_HOST_LOOKUP_FAILURE, gai_strerror(errcode), strlen(gai_strerror(errcode)));
172 }
173
174 case EAI_SYSTEM:
175 {
176 static memcached_string_t mesg= { memcached_string_make("getaddrinfo") };
177 return memcached_set_errno(*server, errno, &mesg);
178 }
179 case EAI_BADFLAGS:
180 return memcached_set_error_string(*server, MEMCACHED_HOST_LOOKUP_FAILURE, memcached_literal_param("getaddrinfo(EAI_BADFLAGS)"));
181
182 case EAI_MEMORY:
183 return memcached_set_error_string(*server, MEMCACHED_ERRNO, memcached_literal_param("getaddrinfo(EAI_MEMORY)"));
184
185 default:
186 {
187 WATCHPOINT_STRING(server->hostname);
188 WATCHPOINT_STRING(gai_strerror(e));
189 return memcached_set_error_string(*server, MEMCACHED_HOST_LOOKUP_FAILURE, gai_strerror(errcode), strlen(gai_strerror(errcode)));
190 }
191 }
192 }
193
194 server->address_info_next= server->address_info;
195
196 return MEMCACHED_SUCCESS;
197 }
198
199 static inline memcached_return_t set_socket_nonblocking(memcached_server_st *ptr)
200 {
201 #ifdef WIN32
202 u_long arg = 1;
203 if (ioctlsocket(ptr->fd, FIONBIO, &arg) == SOCKET_ERROR)
204 {
205 return memcached_set_errno(*ptr, get_socket_errno(), NULL);
206 }
207 #else
208 int flags;
209
210 do
211 {
212 flags= fcntl(ptr->fd, F_GETFL, 0);
213 }
214 while (flags == -1 && (errno == EINTR || errno == EAGAIN));
215
216 unlikely (flags == -1)
217 {
218 return memcached_set_errno(*ptr, errno, NULL);
219 }
220 else if ((flags & O_NONBLOCK) == 0)
221 {
222 int rval;
223
224 do
225 {
226 rval= fcntl(ptr->fd, F_SETFL, flags | O_NONBLOCK);
227 }
228 while (rval == -1 && (errno == EINTR || errno == EAGAIN));
229
230 unlikely (rval == -1)
231 {
232 return memcached_set_errno(*ptr, errno, NULL);
233 }
234 }
235 #endif
236 return MEMCACHED_SUCCESS;
237 }
238
239 static memcached_return_t set_socket_options(memcached_server_st *ptr)
240 {
241 WATCHPOINT_ASSERT(ptr->fd != -1);
242
243 if (ptr->type == MEMCACHED_CONNECTION_UDP)
244 return MEMCACHED_SUCCESS;
245
246 #ifdef HAVE_SNDTIMEO
247 if (ptr->root->snd_timeout)
248 {
249 int error;
250 struct timeval waittime;
251
252 waittime.tv_sec= 0;
253 waittime.tv_usec= ptr->root->snd_timeout;
254
255 error= setsockopt(ptr->fd, SOL_SOCKET, SO_SNDTIMEO,
256 &waittime, (socklen_t)sizeof(struct timeval));
257 WATCHPOINT_ASSERT(error == 0);
258 if (error)
259 return MEMCACHED_FAILURE;
260 }
261 #endif
262
263 #ifdef HAVE_RCVTIMEO
264 if (ptr->root->rcv_timeout)
265 {
266 int error;
267 struct timeval waittime;
268
269 waittime.tv_sec= 0;
270 waittime.tv_usec= ptr->root->rcv_timeout;
271
272 error= setsockopt(ptr->fd, SOL_SOCKET, SO_RCVTIMEO,
273 &waittime, (socklen_t)sizeof(struct timeval));
274 WATCHPOINT_ASSERT(error == 0);
275 if (error)
276 return MEMCACHED_FAILURE;
277 }
278 #endif
279
280
281 #if defined(__MACH__) && defined(__APPLE__) || defined(__FreeBSD__)
282 {
283 int set = 1;
284 int error= setsockopt(ptr->fd, SOL_SOCKET, SO_NOSIGPIPE, (void *)&set, sizeof(int));
285
286 // This is not considered a fatal error
287 if (error == -1)
288 {
289 WATCHPOINT_ERRNO(get_socket_errno());
290 perror("setsockopt(SO_NOSIGPIPE)");
291 }
292 }
293 #endif
294
295 if (ptr->root->flags.no_block)
296 {
297 int error;
298 struct linger linger;
299
300 linger.l_onoff= 1;
301 linger.l_linger= 0; /* By default on close() just drop the socket */
302 error= setsockopt(ptr->fd, SOL_SOCKET, SO_LINGER,
303 &linger, (socklen_t)sizeof(struct linger));
304 WATCHPOINT_ASSERT(error == 0);
305 if (error)
306 return MEMCACHED_FAILURE;
307 }
308
309 if (ptr->root->flags.tcp_nodelay)
310 {
311 int flag= 1;
312 int error;
313
314 error= setsockopt(ptr->fd, IPPROTO_TCP, TCP_NODELAY,
315 &flag, (socklen_t)sizeof(int));
316 WATCHPOINT_ASSERT(error == 0);
317 if (error)
318 return MEMCACHED_FAILURE;
319 }
320
321 if (ptr->root->flags.tcp_keepalive)
322 {
323 int flag= 1;
324 int error;
325
326 error= setsockopt(ptr->fd, SOL_SOCKET, SO_KEEPALIVE,
327 &flag, (socklen_t)sizeof(int));
328 WATCHPOINT_ASSERT(error == 0);
329 if (error)
330 return MEMCACHED_FAILURE;
331 }
332
333 #ifdef TCP_KEEPIDLE
334 if (ptr->root->tcp_keepidle > 0)
335 {
336 int error;
337
338 error= setsockopt(ptr->fd, IPPROTO_TCP, TCP_KEEPIDLE,
339 &ptr->root->tcp_keepidle, (socklen_t)sizeof(int));
340 WATCHPOINT_ASSERT(error == 0);
341 if (error)
342 return MEMCACHED_FAILURE;
343 }
344 #endif
345
346 if (ptr->root->send_size > 0)
347 {
348 int error;
349
350 error= setsockopt(ptr->fd, SOL_SOCKET, SO_SNDBUF,
351 &ptr->root->send_size, (socklen_t)sizeof(int));
352 WATCHPOINT_ASSERT(error == 0);
353 if (error)
354 return MEMCACHED_FAILURE;
355 }
356
357 if (ptr->root->recv_size > 0)
358 {
359 int error;
360
361 error= setsockopt(ptr->fd, SOL_SOCKET, SO_RCVBUF,
362 &ptr->root->recv_size, (socklen_t)sizeof(int));
363 WATCHPOINT_ASSERT(error == 0);
364 if (error)
365 return MEMCACHED_FAILURE;
366 }
367
368
369 /* libmemcached will always use nonblocking IO to avoid write deadlocks */
370 return set_socket_nonblocking(ptr);
371 }
372
373 static memcached_return_t unix_socket_connect(memcached_server_st *ptr)
374 {
375 #ifndef WIN32
376 WATCHPOINT_ASSERT(ptr->fd == -1);
377
378 if ((ptr->fd= socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
379 {
380 ptr->cached_errno= errno;
381 return MEMCACHED_CONNECTION_SOCKET_CREATE_FAILURE;
382 }
383
384 struct sockaddr_un servAddr;
385
386 memset(&servAddr, 0, sizeof (struct sockaddr_un));
387 servAddr.sun_family= AF_UNIX;
388 strncpy(servAddr.sun_path, ptr->hostname, sizeof(servAddr.sun_path)); /* Copy filename */
389
390 test_connect:
391 if (connect(ptr->fd,
392 (struct sockaddr *)&servAddr,
393 sizeof(servAddr)) < 0)
394 {
395 switch (errno)
396 {
397 case EINPROGRESS:
398 case EALREADY:
399 case EINTR:
400 goto test_connect;
401 case EISCONN: /* We were spinning waiting on connect */
402 break;
403 default:
404 WATCHPOINT_ERRNO(errno);
405 ptr->cached_errno= errno;
406 return MEMCACHED_ERRNO;
407 }
408 }
409
410 WATCHPOINT_ASSERT(ptr->fd != -1);
411
412 return MEMCACHED_SUCCESS;
413 #else
414 (void)ptr;
415 return MEMCACHED_NOT_SUPPORTED;
416 #endif
417 }
418
419 static memcached_return_t network_connect(memcached_server_st *ptr)
420 {
421 bool timeout_error_occured= false;
422
423 WATCHPOINT_ASSERT(ptr->fd == INVALID_SOCKET);
424 WATCHPOINT_ASSERT(ptr->cursor_active == 0);
425
426 if (! ptr->address_info)
427 {
428 memcached_return_t rc= set_hostinfo(ptr);
429 if (rc != MEMCACHED_SUCCESS)
430 return rc;
431 }
432
433 /* Create the socket */
434 while (ptr->address_info_next && ptr->fd == INVALID_SOCKET)
435 {
436 /* Memcache server does not support IPV6 in udp mode, so skip if not ipv4 */
437 if (ptr->type == MEMCACHED_CONNECTION_UDP && ptr->address_info_next->ai_family != AF_INET)
438 {
439 ptr->address_info_next= ptr->address_info_next->ai_next;
440 continue;
441 }
442
443 if ((ptr->fd= socket(ptr->address_info_next->ai_family,
444 ptr->address_info_next->ai_socktype,
445 ptr->address_info_next->ai_protocol)) < 0)
446 {
447 ptr->cached_errno= get_socket_errno();
448 WATCHPOINT_ERRNO(get_socket_errno());
449 return MEMCACHED_CONNECTION_SOCKET_CREATE_FAILURE;
450 }
451
452 (void)set_socket_options(ptr);
453
454 /* connect to server */
455 if ((connect(ptr->fd, ptr->address_info_next->ai_addr, ptr->address_info_next->ai_addrlen) != SOCKET_ERROR))
456 {
457 break; // Success
458 }
459
460 /* An error occurred */
461 ptr->cached_errno= get_socket_errno();
462 switch (ptr->cached_errno)
463 {
464 case EWOULDBLOCK:
465 case EINPROGRESS: // nonblocking mode - first return
466 case EALREADY: // nonblocking mode - subsequent returns
467 {
468 memcached_return_t rc;
469 rc= connect_poll(ptr);
470
471 if (rc == MEMCACHED_TIMEOUT)
472 timeout_error_occured= true;
473
474 if (rc == MEMCACHED_SUCCESS)
475 break;
476 }
477
478 case EISCONN: // we are connected :-)
479 break;
480
481 case EINTR: // Special case, we retry ai_addr
482 (void)closesocket(ptr->fd);
483 ptr->fd= INVALID_SOCKET;
484 continue;
485
486 default:
487 (void)closesocket(ptr->fd);
488 ptr->fd= INVALID_SOCKET;
489 ptr->address_info_next= ptr->address_info_next->ai_next;
490 break;
491 }
492 }
493
494 if (ptr->fd == INVALID_SOCKET)
495 {
496 WATCHPOINT_STRING("Never got a good file descriptor");
497
498 /* Failed to connect. schedule next retry */
499 if (ptr->root->retry_timeout)
500 {
501 struct timeval next_time;
502
503 if (gettimeofday(&next_time, NULL) == 0)
504 ptr->next_retry= next_time.tv_sec + ptr->root->retry_timeout;
505 }
506
507 if (timeout_error_occured)
508 return MEMCACHED_TIMEOUT;
509
510 return MEMCACHED_ERRNO; /* The last error should be from connect() */
511 }
512
513 return MEMCACHED_SUCCESS; /* The last error should be from connect() */
514 }
515
516 void set_last_disconnected_host(memcached_server_write_instance_st ptr)
517 {
518 // const_cast
519 memcached_st *root= (memcached_st *)ptr->root;
520
521 #if 0
522 WATCHPOINT_STRING(ptr->hostname);
523 WATCHPOINT_NUMBER(ptr->port);
524 WATCHPOINT_ERRNO(ptr->cached_errno);
525 #endif
526 if (root->last_disconnected_server)
527 memcached_server_free(root->last_disconnected_server);
528 root->last_disconnected_server= memcached_server_clone(NULL, ptr);
529 }
530
531 memcached_return_t memcached_connect(memcached_server_write_instance_st ptr)
532 {
533 memcached_return_t rc= MEMCACHED_NO_SERVERS;
534
535 if (ptr->fd != INVALID_SOCKET)
536 return MEMCACHED_SUCCESS;
537
538 LIBMEMCACHED_MEMCACHED_CONNECT_START();
539
540 /* both retry_timeout and server_failure_limit must be set in order to delay retrying a server on error. */
541 WATCHPOINT_ASSERT(ptr->root);
542 if (ptr->root->retry_timeout && ptr->next_retry)
543 {
544 struct timeval curr_time;
545
546 gettimeofday(&curr_time, NULL);
547
548 // We should optimize this to remove the allocation if the server was
549 // the last server to die
550 if (ptr->next_retry > curr_time.tv_sec)
551 {
552 set_last_disconnected_host(ptr);
553
554 return MEMCACHED_SERVER_MARKED_DEAD;
555 }
556 }
557
558 // If we are over the counter failure, we just fail. Reject host only
559 // works if you have a set number of failures.
560 if (ptr->root->server_failure_limit && ptr->server_failure_counter >= ptr->root->server_failure_limit)
561 {
562 set_last_disconnected_host(ptr);
563
564 // @todo fix this by fixing behavior to no longer make use of
565 // memcached_st
566 if (_is_auto_eject_host(ptr->root))
567 {
568 run_distribution((memcached_st *)ptr->root);
569 }
570
571 return MEMCACHED_SERVER_MARKED_DEAD;
572 }
573
574 /* We need to clean up the multi startup piece */
575 switch (ptr->type)
576 {
577 case MEMCACHED_CONNECTION_UNKNOWN:
578 WATCHPOINT_ASSERT(0);
579 rc= MEMCACHED_NOT_SUPPORTED;
580 break;
581 case MEMCACHED_CONNECTION_UDP:
582 case MEMCACHED_CONNECTION_TCP:
583 rc= network_connect(ptr);
584 #ifdef LIBMEMCACHED_WITH_SASL_SUPPORT
585 if (ptr->fd != INVALID_SOCKET && ptr->root->sasl.callbacks)
586 {
587 rc= memcached_sasl_authenticate_connection(ptr);
588 if (rc != MEMCACHED_SUCCESS)
589 {
590 (void)closesocket(ptr->fd);
591 ptr->fd= INVALID_SOCKET;
592 }
593 }
594 #endif
595 break;
596 case MEMCACHED_CONNECTION_UNIX_SOCKET:
597 rc= unix_socket_connect(ptr);
598 break;
599 case MEMCACHED_CONNECTION_MAX:
600 default:
601 WATCHPOINT_ASSERT(0);
602 }
603
604 if (rc == MEMCACHED_SUCCESS)
605 {
606 ptr->server_failure_counter= 0;
607 ptr->next_retry= 0;
608 }
609 else
610 {
611 ptr->server_failure_counter++;
612
613 set_last_disconnected_host(ptr);
614 }
615
616 LIBMEMCACHED_MEMCACHED_CONNECT_END();
617
618 return rc;
619 }