Merge Monty
[m6w6/libmemcached] / libmemcached / memcached_io.c
1 /*
2 Basic socket buffered IO
3 */
4
5 #include "common.h"
6 #include "memcached_io.h"
7 #include <sys/select.h>
8 #include <poll.h>
9
10 typedef enum {
11 MEM_READ,
12 MEM_WRITE,
13 } memc_read_or_write;
14
15 static ssize_t io_flush(memcached_server_st *ptr, memcached_return *error);
16 static void increment_udp_message_id(memcached_server_st *ptr);
17
18 static memcached_return io_wait(memcached_server_st *ptr,
19 memc_read_or_write read_or_write)
20 {
21 struct pollfd fds[1];
22 short flags= 0;
23 int error;
24
25 if (read_or_write == MEM_WRITE) /* write */
26 flags= POLLOUT;
27 else
28 flags= POLLIN;
29
30 memset(&fds, 0, sizeof(struct pollfd));
31 fds[0].fd= ptr->fd;
32 fds[0].events= flags;
33
34 /*
35 ** We are going to block on write, but at least on Solaris we might block
36 ** on write if we haven't read anything from our input buffer..
37 ** Try to purge the input buffer if we don't do any flow control in the
38 ** application layer (just sending a lot of data etc)
39 ** The test is moved down in the purge function to avoid duplication of
40 ** the test.
41 */
42 if (read_or_write == MEM_WRITE)
43 {
44 memcached_return rc=memcached_purge(ptr);
45 if (rc != MEMCACHED_SUCCESS && rc != MEMCACHED_STORED)
46 return MEMCACHED_FAILURE;
47 }
48
49 error= poll(fds, 1, ptr->root->poll_timeout);
50
51 if (error == 1)
52 return MEMCACHED_SUCCESS;
53 else if (error == 0)
54 {
55 return MEMCACHED_TIMEOUT;
56 }
57
58 /* Imposssible for anything other then -1 */
59 WATCHPOINT_ASSERT(error == -1);
60 memcached_quit_server(ptr, 1);
61
62 return MEMCACHED_FAILURE;
63
64 }
65
66 #ifdef UNUSED
67 void memcached_io_preread(memcached_st *ptr)
68 {
69 unsigned int x;
70
71 return;
72
73 for (x= 0; x < ptr->number_of_hosts; x++)
74 {
75 if (memcached_server_response_count(ptr, x) &&
76 ptr->hosts[x].read_data_length < MEMCACHED_MAX_BUFFER )
77 {
78 size_t data_read;
79
80 data_read= read(ptr->hosts[x].fd,
81 ptr->hosts[x].read_ptr + ptr->hosts[x].read_data_length,
82 MEMCACHED_MAX_BUFFER - ptr->hosts[x].read_data_length);
83 if (data_read == -1)
84 continue;
85
86 ptr->hosts[x].read_buffer_length+= data_read;
87 ptr->hosts[x].read_data_length+= data_read;
88 }
89 }
90 }
91 #endif
92
93 memcached_return memcached_io_read(memcached_server_st *ptr,
94 void *buffer, size_t length, ssize_t *nread)
95 {
96 char *buffer_ptr;
97
98 buffer_ptr= buffer;
99
100 while (length)
101 {
102 if (!ptr->read_buffer_length)
103 {
104 ssize_t data_read;
105
106 while (1)
107 {
108 data_read= read(ptr->fd, ptr->read_buffer, MEMCACHED_MAX_BUFFER);
109 if (data_read > 0)
110 break;
111 else if (data_read == -1)
112 {
113 ptr->cached_errno= errno;
114 memcached_return rc= MEMCACHED_UNKNOWN_READ_FAILURE;
115 switch (errno)
116 {
117 case EAGAIN:
118 case EINTR:
119 if ((rc= io_wait(ptr, MEM_READ)) == MEMCACHED_SUCCESS)
120 continue;
121 /* fall through */
122
123 default:
124 {
125 memcached_quit_server(ptr, 1);
126 *nread= -1;
127 return rc;
128 }
129 }
130 }
131 else
132 {
133 /*
134 EOF. Any data received so far is incomplete
135 so discard it. This always reads by byte in case of TCP
136 and protocol enforcement happens at memcached_response()
137 looking for '\n'. We do not care for UDB which requests 8 bytes
138 at once. Generally, this means that connection went away. Since
139 for blocking I/O we do not return 0 and for non-blocking case
140 it will return EGAIN if data is not immediatly available.
141 */
142 memcached_quit_server(ptr, 1);
143 *nread= -1;
144 return MEMCACHED_UNKNOWN_READ_FAILURE;
145 }
146 }
147
148 ptr->io_bytes_sent = 0;
149 ptr->read_data_length= (size_t) data_read;
150 ptr->read_buffer_length= (size_t) data_read;
151 ptr->read_ptr= ptr->read_buffer;
152 }
153
154 if (length > 1)
155 {
156 size_t difference;
157
158 difference= (length > ptr->read_buffer_length) ? ptr->read_buffer_length : length;
159
160 memcpy(buffer_ptr, ptr->read_ptr, difference);
161 length -= difference;
162 ptr->read_ptr+= difference;
163 ptr->read_buffer_length-= difference;
164 buffer_ptr+= difference;
165 }
166 else
167 {
168 *buffer_ptr= *ptr->read_ptr;
169 ptr->read_ptr++;
170 ptr->read_buffer_length--;
171 buffer_ptr++;
172 break;
173 }
174 }
175
176 ptr->server_failure_counter= 0;
177 *nread = (ssize_t)(buffer_ptr - (char*)buffer);
178 return MEMCACHED_SUCCESS;
179 }
180
181 ssize_t memcached_io_write(memcached_server_st *ptr,
182 const void *buffer, size_t length, char with_flush)
183 {
184 size_t original_length;
185 const char* buffer_ptr;
186
187 WATCHPOINT_ASSERT(ptr->fd != -1);
188
189 original_length= length;
190 buffer_ptr= buffer;
191
192 while (length)
193 {
194 char *write_ptr;
195 size_t should_write;
196 size_t buffer_end;
197
198 if (ptr->type == MEMCACHED_CONNECTION_UDP)
199 {
200 //UDP does not support partial writes
201 buffer_end= MAX_UDP_DATAGRAM_LENGTH;
202 should_write= length;
203 if (ptr->write_buffer_offset + should_write > buffer_end)
204 return -1;
205 }
206 else
207 {
208 buffer_end= MEMCACHED_MAX_BUFFER;
209 should_write= buffer_end - ptr->write_buffer_offset;
210 should_write= (should_write < length) ? should_write : length;
211 }
212
213 write_ptr= ptr->write_buffer + ptr->write_buffer_offset;
214 memcpy(write_ptr, buffer_ptr, should_write);
215 ptr->write_buffer_offset+= should_write;
216 buffer_ptr+= should_write;
217 length-= should_write;
218
219 if (ptr->write_buffer_offset == buffer_end && ptr->type != MEMCACHED_CONNECTION_UDP)
220 {
221 memcached_return rc;
222 ssize_t sent_length;
223
224 WATCHPOINT_ASSERT(ptr->fd != -1);
225 sent_length= io_flush(ptr, &rc);
226 if (sent_length == -1)
227 return -1;
228
229 /* If io_flush calls memcached_purge, sent_length may be 0 */
230 unlikely (sent_length != 0)
231 {
232 WATCHPOINT_ASSERT(sent_length == (ssize_t)buffer_end);
233 }
234 }
235 }
236
237 if (with_flush)
238 {
239 memcached_return rc;
240 WATCHPOINT_ASSERT(ptr->fd != -1);
241 if (io_flush(ptr, &rc) == -1)
242 return -1;
243 }
244
245 return (ssize_t) original_length;
246 }
247
248 memcached_return memcached_io_close(memcached_server_st *ptr)
249 {
250 int r;
251
252 if (ptr->fd == -1)
253 return MEMCACHED_SUCCESS;
254
255 /* in case of death shutdown to avoid blocking at close() */
256 if (1)
257 {
258 r= shutdown(ptr->fd, SHUT_RDWR);
259
260 #ifdef DEBUG
261 if (r && errno != ENOTCONN)
262 {
263 WATCHPOINT_NUMBER(ptr->fd);
264 WATCHPOINT_ERRNO(errno);
265 WATCHPOINT_ASSERT(errno);
266 }
267 #endif
268 }
269
270 r= close(ptr->fd);
271 #ifdef DEBUG
272 if (r != 0)
273 WATCHPOINT_ERRNO(errno);
274 #endif
275
276 return MEMCACHED_SUCCESS;
277 }
278
279 memcached_server_st *memcached_io_get_readable_server(memcached_st *memc)
280 {
281 #define MAX_SERVERS_TO_POLL 100
282 struct pollfd fds[MAX_SERVERS_TO_POLL];
283 unsigned int host_index= 0;
284
285 for (unsigned int x= 0;
286 x< memc->number_of_hosts && host_index < MAX_SERVERS_TO_POLL;
287 ++x)
288 {
289 if (memc->hosts[x].read_buffer_length > 0) /* I have data in the buffer */
290 return &memc->hosts[x];
291
292 if (memcached_server_response_count(&memc->hosts[x]) > 0)
293 {
294 fds[host_index].events = POLLIN;
295 fds[host_index].revents = 0;
296 fds[host_index].fd = memc->hosts[x].fd;
297 ++host_index;
298 }
299 }
300
301 if (host_index < 2)
302 {
303 /* We have 0 or 1 server with pending events.. */
304 for (unsigned int x= 0; x< memc->number_of_hosts; ++x)
305 if (memcached_server_response_count(&memc->hosts[x]) > 0)
306 return &memc->hosts[x];
307
308 return NULL;
309 }
310
311 int err= poll(fds, host_index, memc->poll_timeout);
312 switch (err) {
313 case -1:
314 memc->cached_errno = errno;
315 /* FALLTHROUGH */
316 case 0:
317 break;
318 default:
319 for (unsigned int x= 0; x < host_index; ++x)
320 if (fds[x].revents & POLLIN)
321 for (unsigned int y= 0; y < memc->number_of_hosts; ++y)
322 if (memc->hosts[y].fd == fds[x].fd)
323 return &memc->hosts[y];
324 }
325
326 return NULL;
327 }
328
329 static ssize_t io_flush(memcached_server_st *ptr,
330 memcached_return *error)
331 {
332 /*
333 ** We might want to purge the input buffer if we haven't consumed
334 ** any output yet... The test for the limits is the purge is inline
335 ** in the purge function to avoid duplicating the logic..
336 */
337 {
338 memcached_return rc;
339 WATCHPOINT_ASSERT(ptr->fd != -1);
340 rc= memcached_purge(ptr);
341
342 if (rc != MEMCACHED_SUCCESS && rc != MEMCACHED_STORED)
343 return -1;
344 }
345 ssize_t sent_length;
346 size_t return_length;
347 char *local_write_ptr= ptr->write_buffer;
348 size_t write_length= ptr->write_buffer_offset;
349
350 *error= MEMCACHED_SUCCESS;
351
352 WATCHPOINT_ASSERT(ptr->fd != -1);
353
354 // UDP Sanity check, make sure that we are not sending somthing too big
355 if (ptr->type == MEMCACHED_CONNECTION_UDP && write_length > MAX_UDP_DATAGRAM_LENGTH)
356 return -1;
357
358 if (ptr->write_buffer_offset == 0 || (ptr->type == MEMCACHED_CONNECTION_UDP
359 && ptr->write_buffer_offset == UDP_DATAGRAM_HEADER_LENGTH))
360 return 0;
361
362 /* Looking for memory overflows */
363 #if defined(DEBUG)
364 if (write_length == MEMCACHED_MAX_BUFFER)
365 WATCHPOINT_ASSERT(ptr->write_buffer == local_write_ptr);
366 WATCHPOINT_ASSERT((ptr->write_buffer + MEMCACHED_MAX_BUFFER) >= (local_write_ptr + write_length));
367 #endif
368
369 return_length= 0;
370 while (write_length)
371 {
372 WATCHPOINT_ASSERT(ptr->fd != -1);
373 WATCHPOINT_ASSERT(write_length > 0);
374 sent_length= 0;
375 if (ptr->type == MEMCACHED_CONNECTION_UDP)
376 increment_udp_message_id(ptr);
377 sent_length= write(ptr->fd, local_write_ptr, write_length);
378
379 if (sent_length == -1)
380 {
381 ptr->cached_errno= errno;
382 switch (errno)
383 {
384 case ENOBUFS:
385 continue;
386 case EAGAIN:
387 {
388 memcached_return rc;
389 rc= io_wait(ptr, MEM_WRITE);
390
391 if (rc == MEMCACHED_SUCCESS || rc == MEMCACHED_TIMEOUT)
392 continue;
393
394 memcached_quit_server(ptr, 1);
395 return -1;
396 }
397 default:
398 memcached_quit_server(ptr, 1);
399 *error= MEMCACHED_ERRNO;
400 return -1;
401 }
402 }
403
404 if (ptr->type == MEMCACHED_CONNECTION_UDP &&
405 (size_t)sent_length != write_length)
406 {
407 memcached_quit_server(ptr, 1);
408 return -1;
409 }
410
411 ptr->io_bytes_sent += (uint32_t) sent_length;
412
413 local_write_ptr+= sent_length;
414 write_length-= (uint32_t) sent_length;
415 return_length+= (uint32_t) sent_length;
416 }
417
418 WATCHPOINT_ASSERT(write_length == 0);
419 // Need to study this assert() WATCHPOINT_ASSERT(return_length ==
420 // ptr->write_buffer_offset);
421
422 // if we are a udp server, the begining of the buffer is reserverd for
423 // the upd frame header
424 if (ptr->type == MEMCACHED_CONNECTION_UDP)
425 ptr->write_buffer_offset= UDP_DATAGRAM_HEADER_LENGTH;
426 else
427 ptr->write_buffer_offset= 0;
428
429 return (ssize_t) return_length;
430 }
431
432 /*
433 Eventually we will just kill off the server with the problem.
434 */
435 void memcached_io_reset(memcached_server_st *ptr)
436 {
437 memcached_quit_server(ptr, 1);
438 }
439
440 /**
441 * Read a given number of bytes from the server and place it into a specific
442 * buffer. Reset the IO channel on this server if an error occurs.
443 */
444 memcached_return memcached_safe_read(memcached_server_st *ptr,
445 void *dta,
446 size_t size)
447 {
448 size_t offset= 0;
449 char *data= dta;
450
451 while (offset < size)
452 {
453 ssize_t nread;
454 memcached_return rc= memcached_io_read(ptr, data + offset, size - offset,
455 &nread);
456 if (rc != MEMCACHED_SUCCESS)
457 return rc;
458
459 offset+= (size_t) nread;
460 }
461
462 return MEMCACHED_SUCCESS;
463 }
464
465 memcached_return memcached_io_readline(memcached_server_st *ptr,
466 char *buffer_ptr,
467 size_t size)
468 {
469 bool line_complete= false;
470 size_t total_nr= 0;
471
472 while (!line_complete)
473 {
474 if (ptr->read_buffer_length == 0)
475 {
476 /*
477 * We don't have any data in the buffer, so let's fill the read
478 * buffer. Call the standard read function to avoid duplicating
479 * the logic.
480 */
481 ssize_t nread;
482 memcached_return rc= memcached_io_read(ptr, buffer_ptr, 1, &nread);
483 if (rc != MEMCACHED_SUCCESS)
484 return rc;
485
486 if (*buffer_ptr == '\n')
487 line_complete= true;
488
489 ++buffer_ptr;
490 ++total_nr;
491 }
492
493 /* Now let's look in the buffer and copy as we go! */
494 while (ptr->read_buffer_length && total_nr < size && !line_complete)
495 {
496 *buffer_ptr = *ptr->read_ptr;
497 if (*buffer_ptr == '\n')
498 line_complete = true;
499 --ptr->read_buffer_length;
500 ++ptr->read_ptr;
501 ++total_nr;
502 ++buffer_ptr;
503 }
504
505 if (total_nr == size)
506 return MEMCACHED_PROTOCOL_ERROR;
507 }
508
509 return MEMCACHED_SUCCESS;
510 }
511
512 /*
513 * The udp request id consists of two seperate sections
514 * 1) The thread id
515 * 2) The message number
516 * The thread id should only be set when the memcached_st struct is created
517 * and should not be changed.
518 *
519 * The message num is incremented for each new message we send, this function
520 * extracts the message number from message_id, increments it and then
521 * writes the new value back into the header
522 */
523 static void increment_udp_message_id(memcached_server_st *ptr)
524 {
525 struct udp_datagram_header_st *header= (struct udp_datagram_header_st *)ptr->write_buffer;
526 uint16_t cur_req= get_udp_datagram_request_id(header);
527 int msg_num= get_msg_num_from_request_id(cur_req);
528 int thread_id= get_thread_id_from_request_id(cur_req);
529
530 if (((++msg_num) & UDP_REQUEST_ID_THREAD_MASK) != 0)
531 msg_num= 0;
532
533 header->request_id= htons((uint16_t) (thread_id | msg_num));
534 }
535
536 memcached_return memcached_io_init_udp_header(memcached_server_st *ptr, uint16_t thread_id)
537 {
538 if (thread_id > UDP_REQUEST_ID_MAX_THREAD_ID)
539 return MEMCACHED_FAILURE;
540
541 struct udp_datagram_header_st *header= (struct udp_datagram_header_st *)ptr->write_buffer;
542 header->request_id= htons((uint16_t) (generate_udp_request_thread_id(thread_id)));
543 header->num_datagrams= htons(1);
544 header->sequence_number= htons(0);
545
546 return MEMCACHED_SUCCESS;
547 }