Add support for AUTO_EJECT_HOST
[awesomized/libmemcached] / libmemcached / memcached_io.c
1 /*
2 Basic socket buffered IO
3 */
4
5 #include "common.h"
6 #include "memcached_io.h"
7 #include <sys/select.h>
8 #include <poll.h>
9
10 typedef enum {
11 MEM_READ,
12 MEM_WRITE,
13 } memc_read_or_write;
14
15 static ssize_t io_flush(memcached_server_st *ptr, memcached_return *error);
16 static void increment_udp_message_id(memcached_server_st *ptr);
17
18 static memcached_return io_wait(memcached_server_st *ptr,
19 memc_read_or_write read_or_write)
20 {
21 struct pollfd fds[1];
22 short flags= 0;
23 int error;
24
25 if (read_or_write == MEM_WRITE) /* write */
26 flags= POLLOUT | POLLERR;
27 else
28 flags= POLLIN | POLLERR;
29
30 memset(&fds, 0, sizeof(struct pollfd));
31 fds[0].fd= ptr->fd;
32 fds[0].events= flags;
33
34 /*
35 ** We are going to block on write, but at least on Solaris we might block
36 ** on write if we haven't read anything from our input buffer..
37 ** Try to purge the input buffer if we don't do any flow control in the
38 ** application layer (just sending a lot of data etc)
39 ** The test is moved down in the purge function to avoid duplication of
40 ** the test.
41 */
42 if (read_or_write == MEM_WRITE)
43 {
44 memcached_return rc=memcached_purge(ptr);
45 if (rc != MEMCACHED_SUCCESS && rc != MEMCACHED_STORED)
46 return MEMCACHED_FAILURE;
47 }
48
49 error= poll(fds, 1, ptr->root->poll_timeout);
50
51 if (error == 1)
52 return MEMCACHED_SUCCESS;
53 else if (error == 0)
54 {
55 return MEMCACHED_TIMEOUT;
56 }
57
58 /* Imposssible for anything other then -1 */
59 WATCHPOINT_ASSERT(error == -1);
60 memcached_quit_server(ptr, 1);
61
62 return MEMCACHED_FAILURE;
63
64 }
65
66 #ifdef UNUSED
67 void memcached_io_preread(memcached_st *ptr)
68 {
69 unsigned int x;
70
71 return;
72
73 for (x= 0; x < ptr->number_of_hosts; x++)
74 {
75 if (memcached_server_response_count(ptr, x) &&
76 ptr->hosts[x].read_data_length < MEMCACHED_MAX_BUFFER )
77 {
78 size_t data_read;
79
80 data_read= read(ptr->hosts[x].fd,
81 ptr->hosts[x].read_ptr + ptr->hosts[x].read_data_length,
82 MEMCACHED_MAX_BUFFER - ptr->hosts[x].read_data_length);
83 if (data_read == -1)
84 continue;
85
86 ptr->hosts[x].read_buffer_length+= data_read;
87 ptr->hosts[x].read_data_length+= data_read;
88 }
89 }
90 }
91 #endif
92
93 ssize_t memcached_io_read(memcached_server_st *ptr,
94 void *buffer, size_t length)
95 {
96 char *buffer_ptr;
97
98 buffer_ptr= buffer;
99
100 while (length)
101 {
102 if (!ptr->read_buffer_length)
103 {
104 ssize_t data_read;
105
106 while (1)
107 {
108 data_read= read(ptr->fd,
109 ptr->read_buffer,
110 MEMCACHED_MAX_BUFFER);
111 if (data_read > 0)
112 break;
113 else if (data_read == -1)
114 {
115 ptr->cached_errno= errno;
116 switch (errno)
117 {
118 case EAGAIN:
119 case EINTR:
120 if (io_wait(ptr, MEM_READ) == MEMCACHED_SUCCESS)
121 continue;
122 /* fall through */
123
124 default:
125 {
126 memcached_quit_server(ptr, 1);
127 return -1;
128 }
129 }
130 }
131 else
132 {
133 /*
134 EOF. Any data received so far is incomplete
135 so discard it. This always reads by byte in case of TCP
136 and protocol enforcement happens at memcached_response()
137 looking for '\n'. We do not care for UDB which requests 8 bytes
138 at once. Generally, this means that connection went away. Since
139 for blocking I/O we do not return 0 and for non-blocking case
140 it will return EGAIN if data is not immediatly available.
141 */
142 memcached_quit_server(ptr, 1);
143 return -1;
144 }
145 }
146
147 ptr->io_bytes_sent = 0;
148 ptr->read_data_length= data_read;
149 ptr->read_buffer_length= data_read;
150 ptr->read_ptr= ptr->read_buffer;
151 }
152
153 if (length > 1)
154 {
155 size_t difference;
156
157 difference= (length > ptr->read_buffer_length) ? ptr->read_buffer_length : length;
158
159 memcpy(buffer_ptr, ptr->read_ptr, difference);
160 length -= difference;
161 ptr->read_ptr+= difference;
162 ptr->read_buffer_length-= difference;
163 buffer_ptr+= difference;
164 }
165 else
166 {
167 *buffer_ptr= *ptr->read_ptr;
168 ptr->read_ptr++;
169 ptr->read_buffer_length--;
170 buffer_ptr++;
171 break;
172 }
173 }
174
175 ptr->server_failure_counter= 0;
176 return (size_t)(buffer_ptr - (char*)buffer);
177 }
178
179 ssize_t memcached_io_write(memcached_server_st *ptr,
180 const void *buffer, size_t length, char with_flush)
181 {
182 size_t original_length;
183 const char* buffer_ptr;
184
185 WATCHPOINT_ASSERT(ptr->fd != -1);
186
187 original_length= length;
188 buffer_ptr= buffer;
189
190 while (length)
191 {
192 char *write_ptr;
193 size_t should_write;
194 size_t buffer_end;
195
196 if (ptr->type == MEMCACHED_CONNECTION_UDP)
197 {
198 //UDP does not support partial writes
199 buffer_end= MAX_UDP_DATAGRAM_LENGTH;
200 should_write= length;
201 if (ptr->write_buffer_offset + should_write > buffer_end)
202 return -1;
203 }
204 else
205 {
206 buffer_end= MEMCACHED_MAX_BUFFER;
207 should_write= buffer_end - ptr->write_buffer_offset;
208 should_write= (should_write < length) ? should_write : length;
209 }
210
211 write_ptr= ptr->write_buffer + ptr->write_buffer_offset;
212 memcpy(write_ptr, buffer_ptr, should_write);
213 ptr->write_buffer_offset+= should_write;
214 buffer_ptr+= should_write;
215 length-= should_write;
216
217 if (ptr->write_buffer_offset == buffer_end && ptr->type != MEMCACHED_CONNECTION_UDP)
218 {
219 memcached_return rc;
220 ssize_t sent_length;
221
222 WATCHPOINT_ASSERT(ptr->fd != -1);
223 sent_length= io_flush(ptr, &rc);
224 if (sent_length == -1)
225 return -1;
226
227 /* If io_flush calls memcached_purge, sent_length may be 0 */
228 if (sent_length != 0)
229 WATCHPOINT_ASSERT(sent_length == buffer_end);
230 }
231 }
232
233 if (with_flush)
234 {
235 memcached_return rc;
236 WATCHPOINT_ASSERT(ptr->fd != -1);
237 if (io_flush(ptr, &rc) == -1)
238 return -1;
239 }
240
241 return original_length;
242 }
243
244 memcached_return memcached_io_close(memcached_server_st *ptr)
245 {
246 int r;
247
248 if (ptr->fd == -1)
249 return MEMCACHED_SUCCESS;
250
251 /* in case of death shutdown to avoid blocking at close() */
252 if (1)
253 {
254 r= shutdown(ptr->fd, SHUT_RDWR);
255
256 #ifdef HAVE_DEBUG
257 if (r && errno != ENOTCONN)
258 {
259 WATCHPOINT_NUMBER(ptr->fd);
260 WATCHPOINT_ERRNO(errno);
261 WATCHPOINT_ASSERT(errno);
262 }
263 #endif
264 }
265
266 r= close(ptr->fd);
267 #ifdef HAVE_DEBUG
268 if (r != 0)
269 WATCHPOINT_ERRNO(errno);
270 #endif
271
272 return MEMCACHED_SUCCESS;
273 }
274
275 memcached_server_st *memcached_io_get_readable_server(memcached_st *memc)
276 {
277 #define MAX_SERVERS_TO_POLL 100
278 struct pollfd fds[MAX_SERVERS_TO_POLL];
279 int index= 0;
280
281 for (int x= 0; x< memc->number_of_hosts && index < MAX_SERVERS_TO_POLL; ++x)
282 {
283 if (memc->hosts[x].read_buffer_length > 0) /* I have data in the buffer */
284 return &memc->hosts[x];
285
286 if (memcached_server_response_count(&memc->hosts[x]) > 0)
287 {
288 fds[index].events = POLLIN;
289 fds[index].revents = 0;
290 fds[index].fd = memc->hosts[x].fd;
291 ++index;
292 }
293 }
294
295 if (index < 2)
296 {
297 /* We have 0 or 1 server with pending events.. */
298 for (int x= 0; x< memc->number_of_hosts; ++x)
299 if (memcached_server_response_count(&memc->hosts[x]) > 0)
300 return &memc->hosts[x];
301
302 return NULL;
303 }
304
305 int err= poll(fds, index, memc->poll_timeout);
306 switch (err) {
307 case -1:
308 memc->cached_errno = errno;
309 /* FALLTHROUGH */
310 case 0:
311 break;
312 default:
313 for (int x= 0; x < index; ++x)
314 if (fds[x].revents & POLLIN)
315 for (int y= 0; y < memc->number_of_hosts; ++y)
316 if (memc->hosts[y].fd == fds[x].fd)
317 return &memc->hosts[y];
318 }
319
320 return NULL;
321 }
322
323 static ssize_t io_flush(memcached_server_st *ptr,
324 memcached_return *error)
325 {
326 /*
327 ** We might want to purge the input buffer if we haven't consumed
328 ** any output yet... The test for the limits is the purge is inline
329 ** in the purge function to avoid duplicating the logic..
330 */
331 {
332 memcached_return rc;
333 WATCHPOINT_ASSERT(ptr->fd != -1);
334 rc= memcached_purge(ptr);
335
336 if (rc != MEMCACHED_SUCCESS && rc != MEMCACHED_STORED)
337 return -1;
338 }
339 ssize_t sent_length;
340 size_t return_length;
341 char *local_write_ptr= ptr->write_buffer;
342 size_t write_length= ptr->write_buffer_offset;
343
344 *error= MEMCACHED_SUCCESS;
345
346 WATCHPOINT_ASSERT(ptr->fd != -1);
347
348 // UDP Sanity check, make sure that we are not sending somthing too big
349 if (ptr->type == MEMCACHED_CONNECTION_UDP && write_length > MAX_UDP_DATAGRAM_LENGTH)
350 return -1;
351
352 if (ptr->write_buffer_offset == 0 || (ptr->type == MEMCACHED_CONNECTION_UDP
353 && ptr->write_buffer_offset == UDP_DATAGRAM_HEADER_LENGTH))
354 return 0;
355
356 /* Looking for memory overflows */
357 #if defined(HAVE_DEBUG)
358 if (write_length == MEMCACHED_MAX_BUFFER)
359 WATCHPOINT_ASSERT(ptr->write_buffer == local_write_ptr);
360 WATCHPOINT_ASSERT((ptr->write_buffer + MEMCACHED_MAX_BUFFER) >= (local_write_ptr + write_length));
361 #endif
362
363 return_length= 0;
364 while (write_length)
365 {
366 WATCHPOINT_ASSERT(ptr->fd != -1);
367 WATCHPOINT_ASSERT(write_length > 0);
368 sent_length= 0;
369 if (ptr->type == MEMCACHED_CONNECTION_UDP)
370 increment_udp_message_id(ptr);
371 sent_length= write(ptr->fd, local_write_ptr, write_length);
372
373 if (sent_length == -1)
374 {
375 ptr->cached_errno= errno;
376 switch (errno)
377 {
378 case ENOBUFS:
379 continue;
380 case EAGAIN:
381 {
382 memcached_return rc;
383 rc= io_wait(ptr, MEM_WRITE);
384
385 if (rc == MEMCACHED_SUCCESS || rc == MEMCACHED_TIMEOUT)
386 continue;
387
388 memcached_quit_server(ptr, 1);
389 return -1;
390 }
391 default:
392 memcached_quit_server(ptr, 1);
393 *error= MEMCACHED_ERRNO;
394 return -1;
395 }
396 }
397
398 if (ptr->type == MEMCACHED_CONNECTION_UDP && sent_length != write_length)
399 {
400 memcached_quit_server(ptr, 1);
401 return -1;
402 }
403
404 ptr->io_bytes_sent += sent_length;
405
406 local_write_ptr+= sent_length;
407 write_length-= sent_length;
408 return_length+= sent_length;
409 }
410
411 WATCHPOINT_ASSERT(write_length == 0);
412 // Need to study this assert() WATCHPOINT_ASSERT(return_length ==
413 // ptr->write_buffer_offset);
414
415 // if we are a udp server, the begining of the buffer is reserverd for
416 // the upd frame header
417 if (ptr->type == MEMCACHED_CONNECTION_UDP)
418 ptr->write_buffer_offset= UDP_DATAGRAM_HEADER_LENGTH;
419 else
420 ptr->write_buffer_offset= 0;
421
422 return return_length;
423 }
424
425 /*
426 Eventually we will just kill off the server with the problem.
427 */
428 void memcached_io_reset(memcached_server_st *ptr)
429 {
430 memcached_quit_server(ptr, 1);
431 }
432
433 /**
434 * Read a given number of bytes from the server and place it into a specific
435 * buffer. Reset the IO channel on this server if an error occurs.
436 */
437 memcached_return memcached_safe_read(memcached_server_st *ptr,
438 void *dta,
439 size_t size)
440 {
441 size_t offset= 0;
442 char *data= dta;
443
444 while (offset < size)
445 {
446 ssize_t nread= memcached_io_read(ptr, data + offset, size - offset);
447 if (nread <= 0)
448 {
449 memcached_io_reset(ptr);
450 return MEMCACHED_UNKNOWN_READ_FAILURE;
451 }
452 offset+= nread;
453 }
454
455 return MEMCACHED_SUCCESS;
456 }
457
458 memcached_return memcached_io_readline(memcached_server_st *ptr,
459 char *buffer_ptr,
460 size_t size)
461 {
462 bool line_complete= false;
463 int total_nr= 0;
464
465 while (!line_complete)
466 {
467 if (ptr->read_buffer_length == 0)
468 {
469 /*
470 * We don't have any data in the buffer, so let's fill the read
471 * buffer. Call the standard read function to avoid duplicating
472 * the logic.
473 */
474 if (memcached_io_read(ptr, buffer_ptr, 1) != 1)
475 return MEMCACHED_UNKNOWN_READ_FAILURE;
476
477 if (*buffer_ptr == '\n')
478 line_complete= true;
479
480 ++buffer_ptr;
481 ++total_nr;
482 }
483
484 /* Now let's look in the buffer and copy as we go! */
485 while (ptr->read_buffer_length && total_nr < size && !line_complete)
486 {
487 *buffer_ptr = *ptr->read_ptr;
488 if (*buffer_ptr == '\n')
489 line_complete = true;
490 --ptr->read_buffer_length;
491 ++ptr->read_ptr;
492 ++total_nr;
493 ++buffer_ptr;
494 }
495
496 if (total_nr == size)
497 return MEMCACHED_PROTOCOL_ERROR;
498 }
499
500 return MEMCACHED_SUCCESS;
501 }
502
503 /*
504 * The udp request id consists of two seperate sections
505 * 1) The thread id
506 * 2) The message number
507 * The thread id should only be set when the memcached_st struct is created
508 * and should not be changed.
509 *
510 * The message num is incremented for each new message we send, this function
511 * extracts the message number from message_id, increments it and then
512 * writes the new value back into the header
513 */
514 static void increment_udp_message_id(memcached_server_st *ptr)
515 {
516 struct udp_datagram_header_st *header= (struct udp_datagram_header_st *)ptr->write_buffer;
517 uint16_t cur_req= get_udp_datagram_request_id(header);
518 uint16_t msg_num= get_msg_num_from_request_id(cur_req);
519 uint16_t thread_id= get_thread_id_from_request_id(cur_req);
520
521 if (((++msg_num) & UDP_REQUEST_ID_THREAD_MASK) != 0)
522 msg_num= 0;
523
524 header->request_id= htons(thread_id | msg_num);
525 }
526
527 memcached_return memcached_io_init_udp_header(memcached_server_st *ptr, uint16_t thread_id)
528 {
529 if (thread_id > UDP_REQUEST_ID_MAX_THREAD_ID)
530 return MEMCACHED_FAILURE;
531
532 struct udp_datagram_header_st *header= (struct udp_datagram_header_st *)ptr->write_buffer;
533 header->request_id= htons(generate_udp_request_thread_id(thread_id));
534 header->num_datagrams= htons(1);
535 header->sequence_number= htons(0);
536
537 return MEMCACHED_SUCCESS;
538 }