Minor code code cleanup, plus modification to while() loop around poll in
[m6w6/libmemcached] / libmemcached / io.c
1 /* LibMemcached
2 * Copyright (C) 2006-2009 Brian Aker
3 * All rights reserved.
4 *
5 * Use and distribution licensed under the BSD license. See
6 * the COPYING file in the parent directory for full text.
7 *
8 * Summary: Server IO, Not public!
9 *
10 */
11
12
13 #include "common.h"
14 #include <sys/select.h>
15 #include <poll.h>
16
17 typedef enum {
18 MEM_READ,
19 MEM_WRITE
20 } memc_read_or_write;
21
22 static ssize_t io_flush(memcached_server_instance_st *ptr, memcached_return_t *error);
23 static void increment_udp_message_id(memcached_server_instance_st *ptr);
24
25 static memcached_return_t io_wait(memcached_server_instance_st *ptr,
26 memc_read_or_write read_or_write)
27 {
28 struct pollfd fds= {
29 .fd= ptr->fd,
30 .events = POLLIN
31 };
32 int error;
33
34 unlikely (read_or_write == MEM_WRITE) /* write */
35 fds.events= POLLOUT;
36
37 /*
38 ** We are going to block on write, but at least on Solaris we might block
39 ** on write if we haven't read anything from our input buffer..
40 ** Try to purge the input buffer if we don't do any flow control in the
41 ** application layer (just sending a lot of data etc)
42 ** The test is moved down in the purge function to avoid duplication of
43 ** the test.
44 */
45 if (read_or_write == MEM_WRITE)
46 {
47 memcached_return_t rc= memcached_purge(ptr);
48 if (rc != MEMCACHED_SUCCESS && rc != MEMCACHED_STORED)
49 return MEMCACHED_FAILURE;
50 }
51
52 int timeout= ptr->root->poll_timeout;
53 if (ptr->root->flags.no_block == false)
54 timeout= -1;
55
56 while (1)
57 {
58 error= poll(&fds, 1, timeout);
59
60 switch (error)
61 {
62 case 1:
63 return MEMCACHED_SUCCESS;
64 case 0:
65 return MEMCACHED_TIMEOUT;
66 case ERESTART:
67 case EINTR:
68 continue;
69 default:
70 ptr->cached_errno= error;
71 memcached_quit_server(ptr, 1);
72
73 return MEMCACHED_FAILURE;
74 }
75 }
76
77 /* Imposssible for anything other then -1 */
78 WATCHPOINT_ASSERT(error == -1);
79 ptr->cached_errno= error;
80 memcached_quit_server(ptr, 1);
81
82 return MEMCACHED_FAILURE;
83 }
84
85 /**
86 * Try to fill the input buffer for a server with as much
87 * data as possible.
88 *
89 * @param ptr the server to pack
90 */
91 static bool repack_input_buffer(memcached_server_instance_st *ptr)
92 {
93 if (ptr->read_ptr != ptr->read_buffer)
94 {
95 /* Move all of the data to the beginning of the buffer so
96 ** that we can fit more data into the buffer...
97 */
98 memmove(ptr->read_buffer, ptr->read_ptr, ptr->read_buffer_length);
99 ptr->read_ptr= ptr->read_buffer;
100 ptr->read_data_length= ptr->read_buffer_length;
101 }
102
103 /* There is room in the buffer, try to fill it! */
104 if (ptr->read_buffer_length != MEMCACHED_MAX_BUFFER)
105 {
106 /* Just try a single read to grab what's available */
107 ssize_t nr= read(ptr->fd,
108 ptr->read_ptr + ptr->read_data_length,
109 MEMCACHED_MAX_BUFFER - ptr->read_data_length);
110
111 if (nr > 0)
112 {
113 ptr->read_data_length+= (size_t)nr;
114 ptr->read_buffer_length+= (size_t)nr;
115 return true;
116 }
117 }
118 return false;
119 }
120
121 /**
122 * If the we have callbacks connected to this server structure
123 * we may start process the input queue and fire the callbacks
124 * for the incomming messages. This function is _only_ called
125 * when the input buffer is full, so that we _know_ that we have
126 * at least _one_ message to process.
127 *
128 * @param ptr the server to star processing iput messages for
129 * @return true if we processed anything, false otherwise
130 */
131 static bool process_input_buffer(memcached_server_instance_st *ptr)
132 {
133 /*
134 ** We might be able to process some of the response messages if we
135 ** have a callback set up
136 */
137 if (ptr->root->callbacks != NULL && ptr->root->flags.use_udp == false)
138 {
139 /*
140 * We might have responses... try to read them out and fire
141 * callbacks
142 */
143 memcached_callback_st cb= *ptr->root->callbacks;
144
145 memcached_set_processing_input((memcached_st *)ptr->root, true);
146
147 char buffer[MEMCACHED_DEFAULT_COMMAND_SIZE];
148 memcached_return_t error;
149 memcached_st *root= (memcached_st *)ptr->root;
150 error= memcached_response(ptr, buffer, sizeof(buffer),
151 &root->result);
152
153 memcached_set_processing_input(root, false);
154
155 if (error == MEMCACHED_SUCCESS)
156 {
157 for (unsigned int x= 0; x < cb.number_of_callback; x++)
158 {
159 error= (*cb.callback[x])(ptr->root, &root->result, cb.context);
160 if (error != MEMCACHED_SUCCESS)
161 break;
162 }
163
164 /* @todo what should I do with the error message??? */
165 }
166 /* @todo what should I do with other error messages?? */
167 return true;
168 }
169
170 return false;
171 }
172
173 static inline void memcached_io_cork_push(memcached_server_st *ptr)
174 {
175 (void)ptr;
176 #ifdef CORK
177 if (ptr->root->flags.cork == false || ptr->state.is_corked)
178 return;
179
180 int enable= 1;
181 int err= setsockopt(ptr->fd, IPPROTO_TCP, CORK,
182 &enable, (socklen_t)sizeof(int));
183 if (! err)
184 ptr->state.is_corked= true;
185
186 WATCHPOINT_ASSERT(ptr->state.is_corked == true);
187 #endif
188 }
189
190 static inline void memcached_io_cork_pop(memcached_server_st *ptr)
191 {
192 (void)ptr;
193 #ifdef CORK
194 if (ptr->root->flags.cork == false || ptr->state.is_corked == false)
195 return;
196
197 int enable= 0;
198 int err= setsockopt(ptr->fd, IPPROTO_TCP, CORK,
199 &enable, (socklen_t)sizeof(int));
200 if (! err)
201 ptr->state.is_corked= false;
202
203 WATCHPOINT_ASSERT(ptr->state.is_corked == false);
204 #endif
205 }
206
207 #if 0 // Dead code, this should be removed.
208 void memcached_io_preread(memcached_st *ptr)
209 {
210 unsigned int x;
211
212 return;
213
214 for (x= 0; x < memcached_server_count(ptr); x++)
215 {
216 if (memcached_server_response_count(ptr, x) &&
217 ptr->hosts[x].read_data_length < MEMCACHED_MAX_BUFFER )
218 {
219 size_t data_read;
220
221 data_read= read(ptr->hosts[x].fd,
222 ptr->hosts[x].read_ptr + ptr->hosts[x].read_data_length,
223 MEMCACHED_MAX_BUFFER - ptr->hosts[x].read_data_length);
224 if (data_read == -1)
225 continue;
226
227 ptr->hosts[x].read_buffer_length+= data_read;
228 ptr->hosts[x].read_data_length+= data_read;
229 }
230 }
231 }
232 #endif
233
234 memcached_return_t memcached_io_read(memcached_server_instance_st *ptr,
235 void *buffer, size_t length, ssize_t *nread)
236 {
237 char *buffer_ptr;
238
239 buffer_ptr= buffer;
240
241 while (length)
242 {
243 if (!ptr->read_buffer_length)
244 {
245 ssize_t data_read;
246
247 while (1)
248 {
249 data_read= read(ptr->fd, ptr->read_buffer, MEMCACHED_MAX_BUFFER);
250 if (data_read > 0)
251 break;
252 else if (data_read == -1)
253 {
254 ptr->cached_errno= errno;
255 memcached_return_t rc= MEMCACHED_UNKNOWN_READ_FAILURE;
256 switch (errno)
257 {
258 case EAGAIN:
259 case EINTR:
260 case ERESTART:
261 if ((rc= io_wait(ptr, MEM_READ)) == MEMCACHED_SUCCESS)
262 continue;
263 /* fall through */
264
265 default:
266 {
267 memcached_quit_server(ptr, 1);
268 *nread= -1;
269 return rc;
270 }
271 }
272 }
273 else
274 {
275 /*
276 EOF. Any data received so far is incomplete
277 so discard it. This always reads by byte in case of TCP
278 and protocol enforcement happens at memcached_response()
279 looking for '\n'. We do not care for UDB which requests 8 bytes
280 at once. Generally, this means that connection went away. Since
281 for blocking I/O we do not return 0 and for non-blocking case
282 it will return EGAIN if data is not immediatly available.
283 */
284 memcached_quit_server(ptr, 1);
285 *nread= -1;
286 return MEMCACHED_UNKNOWN_READ_FAILURE;
287 }
288 }
289
290 ptr->io_bytes_sent = 0;
291 ptr->read_data_length= (size_t) data_read;
292 ptr->read_buffer_length= (size_t) data_read;
293 ptr->read_ptr= ptr->read_buffer;
294 }
295
296 if (length > 1)
297 {
298 size_t difference;
299
300 difference= (length > ptr->read_buffer_length) ? ptr->read_buffer_length : length;
301
302 memcpy(buffer_ptr, ptr->read_ptr, difference);
303 length -= difference;
304 ptr->read_ptr+= difference;
305 ptr->read_buffer_length-= difference;
306 buffer_ptr+= difference;
307 }
308 else
309 {
310 *buffer_ptr= *ptr->read_ptr;
311 ptr->read_ptr++;
312 ptr->read_buffer_length--;
313 buffer_ptr++;
314 break;
315 }
316 }
317
318 ptr->server_failure_counter= 0;
319 *nread = (ssize_t)(buffer_ptr - (char*)buffer);
320 return MEMCACHED_SUCCESS;
321 }
322
323 ssize_t memcached_io_write(memcached_server_instance_st *ptr,
324 const void *buffer, size_t length, bool with_flush)
325 {
326 size_t original_length;
327 const char* buffer_ptr;
328
329 WATCHPOINT_ASSERT(ptr->fd != -1);
330
331 original_length= length;
332 buffer_ptr= buffer;
333
334 /* more writable data is coming if a flush isn't required, so delay send */
335 if (! with_flush)
336 {
337 memcached_io_cork_push(ptr);
338 }
339
340 while (length)
341 {
342 char *write_ptr;
343 size_t should_write;
344 size_t buffer_end;
345
346 if (ptr->type == MEMCACHED_CONNECTION_UDP)
347 {
348 //UDP does not support partial writes
349 buffer_end= MAX_UDP_DATAGRAM_LENGTH;
350 should_write= length;
351 if (ptr->write_buffer_offset + should_write > buffer_end)
352 return -1;
353 }
354 else
355 {
356 buffer_end= MEMCACHED_MAX_BUFFER;
357 should_write= buffer_end - ptr->write_buffer_offset;
358 should_write= (should_write < length) ? should_write : length;
359 }
360
361 write_ptr= ptr->write_buffer + ptr->write_buffer_offset;
362 memcpy(write_ptr, buffer_ptr, should_write);
363 ptr->write_buffer_offset+= should_write;
364 buffer_ptr+= should_write;
365 length-= should_write;
366
367 if (ptr->write_buffer_offset == buffer_end && ptr->type != MEMCACHED_CONNECTION_UDP)
368 {
369 memcached_return_t rc;
370 ssize_t sent_length;
371
372 WATCHPOINT_ASSERT(ptr->fd != -1);
373 sent_length= io_flush(ptr, &rc);
374 if (sent_length == -1)
375 return -1;
376
377 /* If io_flush calls memcached_purge, sent_length may be 0 */
378 unlikely (sent_length != 0)
379 {
380 WATCHPOINT_ASSERT(sent_length == (ssize_t)buffer_end);
381 }
382 }
383 }
384
385 if (with_flush)
386 {
387 memcached_return_t rc;
388 WATCHPOINT_ASSERT(ptr->fd != -1);
389 if (io_flush(ptr, &rc) == -1)
390 {
391 return -1;
392 }
393
394 memcached_io_cork_pop(ptr);
395 }
396
397 return (ssize_t) original_length;
398 }
399
400 memcached_return_t memcached_io_close(memcached_server_instance_st *ptr)
401 {
402 if (ptr->fd == -1)
403 {
404 return MEMCACHED_SUCCESS;
405 }
406
407 /* in case of death shutdown to avoid blocking at close() */
408 if (shutdown(ptr->fd, SHUT_RDWR) == -1 && errno != ENOTCONN)
409 {
410 WATCHPOINT_NUMBER(ptr->fd);
411 WATCHPOINT_ERRNO(errno);
412 WATCHPOINT_ASSERT(errno);
413 }
414
415 if (close(ptr->fd) == -1)
416 {
417 WATCHPOINT_ERRNO(errno);
418 }
419
420 return MEMCACHED_SUCCESS;
421 }
422
423 memcached_server_instance_st *memcached_io_get_readable_server(memcached_st *memc)
424 {
425 #define MAX_SERVERS_TO_POLL 100
426 struct pollfd fds[MAX_SERVERS_TO_POLL];
427 unsigned int host_index= 0;
428
429 for (uint32_t x= 0;
430 x< memcached_server_count(memc) && host_index < MAX_SERVERS_TO_POLL;
431 ++x)
432 {
433 memcached_server_instance_st *instance=
434 memcached_server_instance_fetch(memc, x);
435
436 if (instance->read_buffer_length > 0) /* I have data in the buffer */
437 return instance;
438
439 if (memcached_server_response_count(instance) > 0)
440 {
441 fds[host_index].events = POLLIN;
442 fds[host_index].revents = 0;
443 fds[host_index].fd = instance->fd;
444 ++host_index;
445 }
446 }
447
448 if (host_index < 2)
449 {
450 /* We have 0 or 1 server with pending events.. */
451 for (uint32_t x= 0; x< memcached_server_count(memc); ++x)
452 {
453 memcached_server_instance_st *instance=
454 memcached_server_instance_fetch(memc, x);
455
456 if (memcached_server_response_count(instance) > 0)
457 {
458 return instance;
459 }
460 }
461
462 return NULL;
463 }
464
465 int err= poll(fds, host_index, memc->poll_timeout);
466 switch (err) {
467 case -1:
468 memc->cached_errno = errno;
469 /* FALLTHROUGH */
470 case 0:
471 break;
472 default:
473 for (size_t x= 0; x < host_index; ++x)
474 {
475 if (fds[x].revents & POLLIN)
476 {
477 for (uint32_t y= 0; y < memcached_server_count(memc); ++y)
478 {
479 memcached_server_instance_st *instance=
480 memcached_server_instance_fetch(memc, y);
481
482 if (instance->fd == fds[x].fd)
483 return instance;
484 }
485 }
486 }
487 }
488
489 return NULL;
490 }
491
492 static ssize_t io_flush(memcached_server_instance_st *ptr,
493 memcached_return_t *error)
494 {
495 /*
496 ** We might want to purge the input buffer if we haven't consumed
497 ** any output yet... The test for the limits is the purge is inline
498 ** in the purge function to avoid duplicating the logic..
499 */
500 {
501 memcached_return_t rc;
502 WATCHPOINT_ASSERT(ptr->fd != -1);
503 rc= memcached_purge(ptr);
504
505 if (rc != MEMCACHED_SUCCESS && rc != MEMCACHED_STORED)
506 return -1;
507 }
508 ssize_t sent_length;
509 size_t return_length;
510 char *local_write_ptr= ptr->write_buffer;
511 size_t write_length= ptr->write_buffer_offset;
512
513 *error= MEMCACHED_SUCCESS;
514
515 WATCHPOINT_ASSERT(ptr->fd != -1);
516
517 // UDP Sanity check, make sure that we are not sending somthing too big
518 if (ptr->type == MEMCACHED_CONNECTION_UDP && write_length > MAX_UDP_DATAGRAM_LENGTH)
519 return -1;
520
521 if (ptr->write_buffer_offset == 0 || (ptr->type == MEMCACHED_CONNECTION_UDP
522 && ptr->write_buffer_offset == UDP_DATAGRAM_HEADER_LENGTH))
523 return 0;
524
525 /* Looking for memory overflows */
526 #if defined(DEBUG)
527 if (write_length == MEMCACHED_MAX_BUFFER)
528 WATCHPOINT_ASSERT(ptr->write_buffer == local_write_ptr);
529 WATCHPOINT_ASSERT((ptr->write_buffer + MEMCACHED_MAX_BUFFER) >= (local_write_ptr + write_length));
530 #endif
531
532 return_length= 0;
533 while (write_length)
534 {
535 WATCHPOINT_ASSERT(ptr->fd != -1);
536 WATCHPOINT_ASSERT(write_length > 0);
537 sent_length= 0;
538 if (ptr->type == MEMCACHED_CONNECTION_UDP)
539 increment_udp_message_id(ptr);
540 sent_length= write(ptr->fd, local_write_ptr, write_length);
541
542 if (sent_length == -1)
543 {
544 ptr->cached_errno= errno;
545 switch (errno)
546 {
547 case ENOBUFS:
548 continue;
549 case EAGAIN:
550 {
551 /*
552 * We may be blocked on write because the input buffer
553 * is full. Let's check if we have room in our input
554 * buffer for more data and retry the write before
555 * waiting..
556 */
557 if (repack_input_buffer(ptr) ||
558 process_input_buffer(ptr))
559 continue;
560
561 memcached_return_t rc;
562 rc= io_wait(ptr, MEM_WRITE);
563
564 if (rc == MEMCACHED_SUCCESS || rc == MEMCACHED_TIMEOUT)
565 continue;
566
567 memcached_quit_server(ptr, 1);
568 return -1;
569 }
570 default:
571 memcached_quit_server(ptr, 1);
572 *error= MEMCACHED_ERRNO;
573 return -1;
574 }
575 }
576
577 if (ptr->type == MEMCACHED_CONNECTION_UDP &&
578 (size_t)sent_length != write_length)
579 {
580 memcached_quit_server(ptr, 1);
581 return -1;
582 }
583
584 ptr->io_bytes_sent += (uint32_t) sent_length;
585
586 local_write_ptr+= sent_length;
587 write_length-= (uint32_t) sent_length;
588 return_length+= (uint32_t) sent_length;
589 }
590
591 WATCHPOINT_ASSERT(write_length == 0);
592 // Need to study this assert() WATCHPOINT_ASSERT(return_length ==
593 // ptr->write_buffer_offset);
594
595 // if we are a udp server, the begining of the buffer is reserverd for
596 // the upd frame header
597 if (ptr->type == MEMCACHED_CONNECTION_UDP)
598 ptr->write_buffer_offset= UDP_DATAGRAM_HEADER_LENGTH;
599 else
600 ptr->write_buffer_offset= 0;
601
602 return (ssize_t) return_length;
603 }
604
605 /*
606 Eventually we will just kill off the server with the problem.
607 */
608 void memcached_io_reset(memcached_server_instance_st *ptr)
609 {
610 memcached_quit_server(ptr, 1);
611 }
612
613 /**
614 * Read a given number of bytes from the server and place it into a specific
615 * buffer. Reset the IO channel on this server if an error occurs.
616 */
617 memcached_return_t memcached_safe_read(memcached_server_instance_st *ptr,
618 void *dta,
619 size_t size)
620 {
621 size_t offset= 0;
622 char *data= dta;
623
624 while (offset < size)
625 {
626 ssize_t nread;
627 memcached_return_t rc= memcached_io_read(ptr, data + offset, size - offset,
628 &nread);
629 if (rc != MEMCACHED_SUCCESS)
630 return rc;
631
632 offset+= (size_t) nread;
633 }
634
635 return MEMCACHED_SUCCESS;
636 }
637
638 memcached_return_t memcached_io_readline(memcached_server_instance_st *ptr,
639 char *buffer_ptr,
640 size_t size)
641 {
642 bool line_complete= false;
643 size_t total_nr= 0;
644
645 while (!line_complete)
646 {
647 if (ptr->read_buffer_length == 0)
648 {
649 /*
650 * We don't have any data in the buffer, so let's fill the read
651 * buffer. Call the standard read function to avoid duplicating
652 * the logic.
653 */
654 ssize_t nread;
655 memcached_return_t rc= memcached_io_read(ptr, buffer_ptr, 1, &nread);
656 if (rc != MEMCACHED_SUCCESS)
657 return rc;
658
659 if (*buffer_ptr == '\n')
660 line_complete= true;
661
662 ++buffer_ptr;
663 ++total_nr;
664 }
665
666 /* Now let's look in the buffer and copy as we go! */
667 while (ptr->read_buffer_length && total_nr < size && !line_complete)
668 {
669 *buffer_ptr = *ptr->read_ptr;
670 if (*buffer_ptr == '\n')
671 line_complete = true;
672 --ptr->read_buffer_length;
673 ++ptr->read_ptr;
674 ++total_nr;
675 ++buffer_ptr;
676 }
677
678 if (total_nr == size)
679 return MEMCACHED_PROTOCOL_ERROR;
680 }
681
682 return MEMCACHED_SUCCESS;
683 }
684
685 /*
686 * The udp request id consists of two seperate sections
687 * 1) The thread id
688 * 2) The message number
689 * The thread id should only be set when the memcached_st struct is created
690 * and should not be changed.
691 *
692 * The message num is incremented for each new message we send, this function
693 * extracts the message number from message_id, increments it and then
694 * writes the new value back into the header
695 */
696 static void increment_udp_message_id(memcached_server_instance_st *ptr)
697 {
698 struct udp_datagram_header_st *header= (struct udp_datagram_header_st *)ptr->write_buffer;
699 uint16_t cur_req= get_udp_datagram_request_id(header);
700 int msg_num= get_msg_num_from_request_id(cur_req);
701 int thread_id= get_thread_id_from_request_id(cur_req);
702
703 if (((++msg_num) & UDP_REQUEST_ID_THREAD_MASK) != 0)
704 msg_num= 0;
705
706 header->request_id= htons((uint16_t) (thread_id | msg_num));
707 }
708
709 memcached_return_t memcached_io_init_udp_header(memcached_server_instance_st *ptr, uint16_t thread_id)
710 {
711 if (thread_id > UDP_REQUEST_ID_MAX_THREAD_ID)
712 return MEMCACHED_FAILURE;
713
714 struct udp_datagram_header_st *header= (struct udp_datagram_header_st *)ptr->write_buffer;
715 header->request_id= htons((uint16_t) (generate_udp_request_thread_id(thread_id)));
716 header->num_datagrams= htons(1);
717 header->sequence_number= htons(0);
718
719 return MEMCACHED_SUCCESS;
720 }