Merge trunk for LP
[m6w6/libmemcached] / libmemcached / connect.cc
1 /* vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
2 *
3 * Libmemcached library
4 *
5 * Copyright (C) 2011 Data Differential, http://datadifferential.com/
6 * Copyright (C) 2006-2010 Brian Aker All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met:
11 *
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * * Redistributions in binary form must reproduce the above
16 * copyright notice, this list of conditions and the following disclaimer
17 * in the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * * The names of its contributors may not be used to endorse or
21 * promote products derived from this software without specific prior
22 * written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 *
36 */
37
38
39 #include <libmemcached/common.h>
40 #include <cassert>
41 #include <ctime>
42 #include <sys/time.h>
43
44 static memcached_return_t connect_poll(memcached_server_st *ptr)
45 {
46 struct pollfd fds[1];
47 fds[0].fd = ptr->fd;
48 fds[0].events = POLLOUT;
49
50 size_t loop_max= 5;
51
52 if (ptr->root->poll_timeout == 0)
53 {
54 return memcached_set_error(*ptr, MEMCACHED_TIMEOUT, MEMCACHED_AT);
55 }
56
57 while (--loop_max) // Should only loop on cases of ERESTART or EINTR
58 {
59 int error= poll(fds, 1, ptr->root->connect_timeout);
60 switch (error)
61 {
62 case 1:
63 {
64 int err;
65 socklen_t len= sizeof (err);
66 (void)getsockopt(ptr->fd, SOL_SOCKET, SO_ERROR, &err, &len);
67
68 // We check the value to see what happened wth the socket.
69 if (err == 0)
70 {
71 return MEMCACHED_SUCCESS;
72 }
73
74 return memcached_set_errno(*ptr, err, MEMCACHED_AT);
75 }
76 case 0:
77 {
78 return memcached_set_error(*ptr, MEMCACHED_TIMEOUT, MEMCACHED_AT);
79 }
80
81 default: // A real error occurred and we need to completely bail
82 WATCHPOINT_ERRNO(get_socket_errno());
83 switch (get_socket_errno())
84 {
85 #ifdef TARGET_OS_LINUX
86 case ERESTART:
87 #endif
88 case EINTR:
89 continue;
90
91 case EFAULT:
92 case ENOMEM:
93 return memcached_set_error(*ptr, MEMCACHED_MEMORY_ALLOCATION_FAILURE, MEMCACHED_AT);
94
95 case EINVAL:
96 return memcached_set_error(*ptr, MEMCACHED_MEMORY_ALLOCATION_FAILURE, MEMCACHED_AT, memcached_literal_param("RLIMIT_NOFILE exceeded, or if OSX the timeout value was invalid"));
97
98 default: // This should not happen
99 if (fds[0].revents & POLLERR)
100 {
101 int err;
102 socklen_t len= sizeof (err);
103 (void)getsockopt(ptr->fd, SOL_SOCKET, SO_ERROR, &err, &len);
104 memcached_set_errno(*ptr, (err == 0) ? get_socket_errno() : err, MEMCACHED_AT);
105 }
106 else
107 {
108 memcached_set_errno(*ptr, get_socket_errno(), MEMCACHED_AT);
109 }
110
111 WATCHPOINT_ASSERT(ptr->fd != INVALID_SOCKET);
112 (void)closesocket(ptr->fd);
113 ptr->fd= INVALID_SOCKET;
114 ptr->state= MEMCACHED_SERVER_STATE_NEW;
115
116 return memcached_set_errno(*ptr, get_socket_errno(), MEMCACHED_AT);
117 }
118 }
119 }
120
121 // This should only be possible from ERESTART or EINTR;
122 return memcached_set_errno(*ptr, get_socket_errno(), MEMCACHED_AT);
123 }
124
125 static memcached_return_t set_hostinfo(memcached_server_st *server)
126 {
127 WATCHPOINT_ASSERT(not server->address_info); // We cover the case where a programming mistake has been made.
128 if (server->address_info)
129 {
130 freeaddrinfo(server->address_info);
131 server->address_info= NULL;
132 server->address_info_next= NULL;
133 }
134
135 char str_port[NI_MAXSERV];
136 int length= snprintf(str_port, NI_MAXSERV, "%u", (uint32_t)server->port);
137 if (length >= NI_MAXSERV || length < 0)
138 return MEMCACHED_FAILURE;
139
140 struct addrinfo hints;
141 memset(&hints, 0, sizeof(struct addrinfo));
142
143 #if 0
144 hints.ai_family= AF_INET;
145 #endif
146 if (server->type == MEMCACHED_CONNECTION_UDP)
147 {
148 hints.ai_protocol= IPPROTO_UDP;
149 hints.ai_socktype= SOCK_DGRAM;
150 }
151 else
152 {
153 hints.ai_socktype= SOCK_STREAM;
154 hints.ai_protocol= IPPROTO_TCP;
155 }
156
157 int errcode;
158 switch(errcode= getaddrinfo(server->hostname, str_port, &hints, &server->address_info))
159 {
160 case 0:
161 break;
162
163 case EAI_AGAIN:
164 return memcached_set_error(*server, MEMCACHED_TIMEOUT, MEMCACHED_AT, memcached_string_make_from_cstr(gai_strerror(errcode)));
165
166 case EAI_SYSTEM:
167 return memcached_set_errno(*server, errno, MEMCACHED_AT, memcached_literal_param("getaddrinfo(EAI_SYSTEM)"));
168
169 case EAI_BADFLAGS:
170 return memcached_set_error(*server, MEMCACHED_INVALID_ARGUMENTS, MEMCACHED_AT, memcached_literal_param("getaddrinfo(EAI_BADFLAGS)"));
171
172 case EAI_MEMORY:
173 return memcached_set_error(*server, MEMCACHED_MEMORY_ALLOCATION_FAILURE, MEMCACHED_AT, memcached_literal_param("getaddrinfo(EAI_MEMORY)"));
174
175 default:
176 {
177 WATCHPOINT_STRING(server->hostname);
178 WATCHPOINT_STRING(gai_strerror(errcode));
179 return memcached_set_error(*server, MEMCACHED_HOST_LOOKUP_FAILURE, MEMCACHED_AT, memcached_string_make_from_cstr(gai_strerror(errcode)));
180 }
181 }
182 server->address_info_next= server->address_info;
183 server->state= MEMCACHED_SERVER_STATE_ADDRINFO;
184
185 return MEMCACHED_SUCCESS;
186 }
187
188 static inline void set_socket_nonblocking(memcached_server_st *ptr)
189 {
190 #ifdef WIN32
191 u_long arg = 1;
192 if (ioctlsocket(ptr->fd, FIONBIO, &arg) == SOCKET_ERROR)
193 {
194 memcached_set_errno(*ptr, get_socket_errno(), NULL);
195 }
196 #else
197 int flags;
198
199 do
200 {
201 flags= fcntl(ptr->fd, F_GETFL, 0);
202 } while (flags == -1 && (errno == EINTR || errno == EAGAIN));
203
204 if (flags == -1)
205 {
206 memcached_set_errno(*ptr, errno, NULL);
207 }
208 else if ((flags & O_NONBLOCK) == 0)
209 {
210 int rval;
211
212 do
213 {
214 rval= fcntl(ptr->fd, F_SETFL, flags | O_NONBLOCK);
215 } while (rval == -1 && (errno == EINTR || errno == EAGAIN));
216
217 unlikely (rval == -1)
218 {
219 memcached_set_errno(*ptr, errno, NULL);
220 }
221 }
222 #endif
223 }
224
225 static void set_socket_options(memcached_server_st *ptr)
226 {
227 WATCHPOINT_ASSERT(ptr->fd != -1);
228
229 if (ptr->type == MEMCACHED_CONNECTION_UDP)
230 return;
231
232 #ifdef HAVE_SNDTIMEO
233 if (ptr->root->snd_timeout)
234 {
235 int error;
236 struct timeval waittime;
237
238 waittime.tv_sec= 0;
239 waittime.tv_usec= ptr->root->snd_timeout;
240
241 error= setsockopt(ptr->fd, SOL_SOCKET, SO_SNDTIMEO,
242 &waittime, (socklen_t)sizeof(struct timeval));
243 WATCHPOINT_ASSERT(error == 0);
244 }
245 #endif
246
247 #ifdef HAVE_RCVTIMEO
248 if (ptr->root->rcv_timeout)
249 {
250 int error;
251 struct timeval waittime;
252
253 waittime.tv_sec= 0;
254 waittime.tv_usec= ptr->root->rcv_timeout;
255
256 error= setsockopt(ptr->fd, SOL_SOCKET, SO_RCVTIMEO,
257 &waittime, (socklen_t)sizeof(struct timeval));
258 WATCHPOINT_ASSERT(error == 0);
259 }
260 #endif
261
262
263 #if defined(__MACH__) && defined(__APPLE__) || defined(__FreeBSD__)
264 {
265 int set= 1;
266 int error= setsockopt(ptr->fd, SOL_SOCKET, SO_NOSIGPIPE, (void *)&set, sizeof(int));
267
268 // This is not considered a fatal error
269 if (error == -1)
270 {
271 WATCHPOINT_ERRNO(get_socket_errno());
272 perror("setsockopt(SO_NOSIGPIPE)");
273 }
274 }
275 #endif
276
277 if (ptr->root->flags.no_block)
278 {
279 int error;
280 struct linger linger;
281
282 linger.l_onoff= 1;
283 linger.l_linger= 0; /* By default on close() just drop the socket */
284 error= setsockopt(ptr->fd, SOL_SOCKET, SO_LINGER,
285 &linger, (socklen_t)sizeof(struct linger));
286 WATCHPOINT_ASSERT(error == 0);
287 }
288
289 if (ptr->root->flags.tcp_nodelay)
290 {
291 int flag= 1;
292 int error;
293
294 error= setsockopt(ptr->fd, IPPROTO_TCP, TCP_NODELAY,
295 &flag, (socklen_t)sizeof(int));
296 WATCHPOINT_ASSERT(error == 0);
297 }
298
299 if (ptr->root->flags.tcp_keepalive)
300 {
301 int flag= 1;
302 int error;
303
304 error= setsockopt(ptr->fd, SOL_SOCKET, SO_KEEPALIVE,
305 &flag, (socklen_t)sizeof(int));
306 WATCHPOINT_ASSERT(error == 0);
307 }
308
309 #ifdef TCP_KEEPIDLE
310 if (ptr->root->tcp_keepidle > 0)
311 {
312 int error;
313
314 error= setsockopt(ptr->fd, IPPROTO_TCP, TCP_KEEPIDLE,
315 &ptr->root->tcp_keepidle, (socklen_t)sizeof(int));
316 WATCHPOINT_ASSERT(error == 0);
317 }
318 #endif
319
320 if (ptr->root->send_size > 0)
321 {
322 int error;
323
324 error= setsockopt(ptr->fd, SOL_SOCKET, SO_SNDBUF,
325 &ptr->root->send_size, (socklen_t)sizeof(int));
326 WATCHPOINT_ASSERT(error == 0);
327 }
328
329 if (ptr->root->recv_size > 0)
330 {
331 int error;
332
333 error= setsockopt(ptr->fd, SOL_SOCKET, SO_RCVBUF,
334 &ptr->root->recv_size, (socklen_t)sizeof(int));
335 WATCHPOINT_ASSERT(error == 0);
336 }
337
338
339 /* libmemcached will always use nonblocking IO to avoid write deadlocks */
340 set_socket_nonblocking(ptr);
341 }
342
343 static memcached_return_t unix_socket_connect(memcached_server_st *ptr)
344 {
345 #ifndef WIN32
346 WATCHPOINT_ASSERT(ptr->fd == -1);
347
348 if ((ptr->fd= socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
349 {
350 memcached_set_errno(*ptr, errno, NULL);
351 return MEMCACHED_CONNECTION_FAILURE;
352 }
353
354 struct sockaddr_un servAddr;
355
356 memset(&servAddr, 0, sizeof (struct sockaddr_un));
357 servAddr.sun_family= AF_UNIX;
358 strncpy(servAddr.sun_path, ptr->hostname, sizeof(servAddr.sun_path)); /* Copy filename */
359
360 do {
361 if (connect(ptr->fd, (struct sockaddr *)&servAddr, sizeof(servAddr)) < 0)
362 {
363 switch (errno)
364 {
365 case EINPROGRESS:
366 case EALREADY:
367 case EINTR:
368 continue;
369
370 case EISCONN: /* We were spinning waiting on connect */
371 {
372 WATCHPOINT_ASSERT(0); // Programmer error
373 break;
374 }
375
376 default:
377 WATCHPOINT_ERRNO(errno);
378 memcached_set_errno(*ptr, errno, MEMCACHED_AT);
379 return MEMCACHED_CONNECTION_FAILURE;
380 }
381 }
382 } while (0);
383 ptr->state= MEMCACHED_SERVER_STATE_CONNECTED;
384
385 WATCHPOINT_ASSERT(ptr->fd != INVALID_SOCKET);
386
387 return MEMCACHED_SUCCESS;
388 #else
389 (void)ptr;
390 return MEMCACHED_NOT_SUPPORTED;
391 #endif
392 }
393
394 static memcached_return_t network_connect(memcached_server_st *ptr)
395 {
396 bool timeout_error_occured= false;
397
398 WATCHPOINT_ASSERT(ptr->fd == INVALID_SOCKET);
399 WATCHPOINT_ASSERT(ptr->cursor_active == 0);
400
401 if (not ptr->address_info)
402 {
403 WATCHPOINT_ASSERT(ptr->state == MEMCACHED_SERVER_STATE_NEW);
404 memcached_return_t rc;
405 uint32_t counter= 5;
406 while (--counter)
407 {
408 if ((rc= set_hostinfo(ptr)) != MEMCACHED_TIMEOUT)
409 break;
410
411 #ifndef WIN32
412 struct timespec dream, rem;
413
414 dream.tv_nsec= 1000;
415 dream.tv_sec= 0;
416
417 nanosleep(&dream, &rem);
418 #endif
419 }
420
421 if (memcached_failed(rc))
422 return rc;
423 }
424
425 /* Create the socket */
426 while (ptr->address_info_next && ptr->fd == INVALID_SOCKET)
427 {
428 /* Memcache server does not support IPV6 in udp mode, so skip if not ipv4 */
429 if (ptr->type == MEMCACHED_CONNECTION_UDP && ptr->address_info_next->ai_family != AF_INET)
430 {
431 ptr->address_info_next= ptr->address_info_next->ai_next;
432 continue;
433 }
434
435 if ((ptr->fd= socket(ptr->address_info_next->ai_family,
436 ptr->address_info_next->ai_socktype,
437 ptr->address_info_next->ai_protocol)) < 0)
438 {
439 return memcached_set_errno(*ptr, get_socket_errno(), NULL);
440 }
441
442 set_socket_options(ptr);
443
444 /* connect to server */
445 if ((connect(ptr->fd, ptr->address_info_next->ai_addr, ptr->address_info_next->ai_addrlen) != SOCKET_ERROR))
446 {
447 ptr->state= MEMCACHED_SERVER_STATE_CONNECTED;
448 break; // Success
449 }
450
451 /* An error occurred */
452 switch (get_socket_errno())
453 {
454 case ETIMEDOUT:
455 timeout_error_occured= true;
456 break;
457
458 case EWOULDBLOCK:
459 case EINPROGRESS: // nonblocking mode - first return
460 case EALREADY: // nonblocking mode - subsequent returns
461 {
462 ptr->state= MEMCACHED_SERVER_STATE_IN_PROGRESS;
463 memcached_return_t rc= connect_poll(ptr);
464
465 if (memcached_success(rc))
466 {
467 ptr->state= MEMCACHED_SERVER_STATE_CONNECTED;
468 return MEMCACHED_SUCCESS;
469 }
470
471 // A timeout here is treated as an error, we will not retry
472 if (rc == MEMCACHED_TIMEOUT)
473 {
474 timeout_error_occured= true;
475 }
476 }
477 break;
478
479 case EISCONN: // we are connected :-)
480 WATCHPOINT_ASSERT(0); // This is a programmer's error
481 break;
482
483 case EINTR: // Special case, we retry ai_addr
484 WATCHPOINT_ASSERT(ptr->fd != INVALID_SOCKET);
485 (void)closesocket(ptr->fd);
486 ptr->fd= INVALID_SOCKET;
487 continue;
488
489 default:
490 break;
491 }
492
493 WATCHPOINT_ASSERT(ptr->fd != INVALID_SOCKET);
494 (void)closesocket(ptr->fd);
495 ptr->fd= INVALID_SOCKET;
496 ptr->address_info_next= ptr->address_info_next->ai_next;
497 }
498
499 WATCHPOINT_ASSERT(ptr->fd == INVALID_SOCKET);
500
501 if (timeout_error_occured)
502 {
503 if (ptr->fd != INVALID_SOCKET)
504 {
505 (void)closesocket(ptr->fd);
506 ptr->fd= INVALID_SOCKET;
507 }
508 }
509
510 WATCHPOINT_STRING("Never got a good file descriptor");
511 /* Failed to connect. schedule next retry */
512 if (ptr->root->retry_timeout)
513 {
514 struct timeval next_time;
515
516 if (gettimeofday(&next_time, NULL) == 0)
517 ptr->next_retry= next_time.tv_sec + ptr->root->retry_timeout;
518 }
519
520 if (timeout_error_occured)
521 return memcached_set_error(*ptr, MEMCACHED_TIMEOUT, MEMCACHED_AT);
522
523 return memcached_set_error(*ptr, MEMCACHED_CONNECTION_FAILURE, MEMCACHED_AT); /* The last error should be from connect() */
524 }
525
526 void set_last_disconnected_host(memcached_server_write_instance_st self)
527 {
528 // const_cast
529 memcached_st *root= (memcached_st *)self->root;
530
531 #if 0
532 WATCHPOINT_STRING(self->hostname);
533 WATCHPOINT_NUMBER(self->port);
534 WATCHPOINT_ERRNO(self->cached_errno);
535 #endif
536 memcached_server_free(root->last_disconnected_server);
537 root->last_disconnected_server= memcached_server_clone(NULL, self);
538 }
539
540 memcached_return_t memcached_connect(memcached_server_write_instance_st ptr)
541 {
542 memcached_return_t rc= MEMCACHED_NO_SERVERS;
543
544 if (ptr->fd != INVALID_SOCKET)
545 {
546 return MEMCACHED_SUCCESS;
547 }
548
549 LIBMEMCACHED_MEMCACHED_CONNECT_START();
550
551 /* both retry_timeout and server_failure_limit must be set in order to delay retrying a server on error. */
552 WATCHPOINT_ASSERT(ptr->root);
553 if (ptr->root->retry_timeout && ptr->next_retry)
554 {
555 struct timeval curr_time;
556
557 gettimeofday(&curr_time, NULL);
558
559 // We should optimize this to remove the allocation if the server was
560 // the last server to die
561 if (ptr->next_retry > curr_time.tv_sec)
562 {
563 set_last_disconnected_host(ptr);
564
565 return memcached_set_error(*ptr, MEMCACHED_SERVER_MARKED_DEAD, MEMCACHED_AT);
566 }
567 }
568
569 // If we are over the counter failure, we just fail. Reject host only
570 // works if you have a set number of failures.
571 if (ptr->root->server_failure_limit && ptr->server_failure_counter >= ptr->root->server_failure_limit)
572 {
573 set_last_disconnected_host(ptr);
574
575 // @todo fix this by fixing behavior to no longer make use of
576 // memcached_st
577 if (_is_auto_eject_host(ptr->root))
578 {
579 run_distribution((memcached_st *)ptr->root);
580 }
581
582 return memcached_set_error(*ptr, MEMCACHED_SERVER_MARKED_DEAD, MEMCACHED_AT);
583 }
584
585 /* We need to clean up the multi startup piece */
586 switch (ptr->type)
587 {
588 case MEMCACHED_CONNECTION_UNKNOWN:
589 WATCHPOINT_ASSERT(0);
590 rc= MEMCACHED_NOT_SUPPORTED;
591 break;
592
593 case MEMCACHED_CONNECTION_UDP:
594 case MEMCACHED_CONNECTION_TCP:
595 rc= network_connect(ptr);
596 #ifdef LIBMEMCACHED_WITH_SASL_SUPPORT
597 if (ptr->fd != INVALID_SOCKET and ptr->root->sasl.callbacks)
598 {
599 rc= memcached_sasl_authenticate_connection(ptr);
600 if (memcached_failed(rc) and ptr->fd != INVALID_SOCKET)
601 {
602 WATCHPOINT_ASSERT(ptr->fd != INVALID_SOCKET);
603 (void)closesocket(ptr->fd);
604 ptr->fd= INVALID_SOCKET;
605 }
606 }
607 #endif
608 break;
609
610 case MEMCACHED_CONNECTION_UNIX_SOCKET:
611 rc= unix_socket_connect(ptr);
612 break;
613
614 case MEMCACHED_CONNECTION_MAX:
615 default:
616 WATCHPOINT_ASSERT(0);
617 }
618
619 if (memcached_success(rc))
620 {
621 ptr->server_failure_counter= 0;
622 ptr->next_retry= 0;
623 }
624 else
625 {
626 memcached_set_error(*ptr, rc, MEMCACHED_AT);
627 ptr->server_failure_counter++;
628 set_last_disconnected_host(ptr);
629 }
630
631 LIBMEMCACHED_MEMCACHED_CONNECT_END();
632
633 return rc;
634 }