Commit 05c36015 authored by Wayne Davison's avatar Wayne Davison

More --timeout improvements, especially for the receiving side:

- The receiver now sends keep-alive messages to the generator
  when it is actively doing work and hasn't sent anything
  recently.  This ensures that the generator won't timeout
  if the receiver is working hard.
- The perform_io() code has improved keep-alive participation.
- Allow the sender to send some keep-alive messages, which
  ensures that if it is in a lull, it can probe the socket.
parent e34f4349
...@@ -289,7 +289,7 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) ...@@ -289,7 +289,7 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
rprintf(FINFO, "delete_in_dir(%s)\n", fbuf); rprintf(FINFO, "delete_in_dir(%s)\n", fbuf);
if (allowed_lull) if (allowed_lull)
maybe_send_keepalive(time(NULL), True); maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
if (io_error && !ignore_errors) { if (io_error && !ignore_errors) {
if (already_warned) if (already_warned)
...@@ -1928,7 +1928,7 @@ static void touch_up_dirs(struct file_list *flist, int ndx) ...@@ -1928,7 +1928,7 @@ static void touch_up_dirs(struct file_list *flist, int ndx)
} }
if (counter >= loopchk_limit) { if (counter >= loopchk_limit) {
if (allowed_lull) if (allowed_lull)
maybe_send_keepalive(time(NULL), True); maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
else else
maybe_flush_socket(0); maybe_flush_socket(0);
counter = 0; counter = 0;
...@@ -2128,7 +2128,7 @@ void generate_files(int f_out, const char *local_name) ...@@ -2128,7 +2128,7 @@ void generate_files(int f_out, const char *local_name)
if (i + cur_flist->ndx_start >= next_loopchk) { if (i + cur_flist->ndx_start >= next_loopchk) {
if (allowed_lull) if (allowed_lull)
maybe_send_keepalive(time(NULL), True); maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
else else
maybe_flush_socket(0); maybe_flush_socket(0);
next_loopchk += loopchk_limit; next_loopchk += loopchk_limit;
......
...@@ -69,7 +69,6 @@ int batch_fd = -1; ...@@ -69,7 +69,6 @@ int batch_fd = -1;
int msgdone_cnt = 0; int msgdone_cnt = 0;
int forward_flist_data = 0; int forward_flist_data = 0;
BOOL flist_receiving_enabled = False; BOOL flist_receiving_enabled = False;
BOOL we_send_keepalive_messages = False;
/* Ignore an EOF error if non-zero. See whine_about_eof(). */ /* Ignore an EOF error if non-zero. See whine_about_eof(). */
int kluge_around_eof = 0; int kluge_around_eof = 0;
...@@ -157,23 +156,33 @@ static void check_timeout(BOOL allow_keepalive) ...@@ -157,23 +156,33 @@ static void check_timeout(BOOL allow_keepalive)
{ {
time_t t, chk; time_t t, chk;
/* On the receiving side, the generator is now handling timeouts, so /* On the receiving side, the generator is now the one that decides
* the receiver ignores them. Note that the am_receiver flag is not * when a timeout has occurred. When it is sifting through a lot of
* set until the receiver forks from the generator, so timeouts will be * files looking for work, it will be sending keep-alive messages to
* based on receiving data on the receiving side until that event. */ * the sender, and even though the receiver won't be sending/receiving
if (!io_timeout || am_receiver) * anything (not even keep-alive messages), the successful writes to
* the sender will keep things going. If the receiver is actively
* receiving data, it will ensure that the generator knows that it is
* not idle by sending the generator keep-alive messages (since the
* generator might be blocked trying to send checksums, it needs to
* know that the receiver is active). Thus, as long as one or the
* other is successfully doing work, the generator will not timeout. */
if (!io_timeout)
return; return;
t = time(NULL); t = time(NULL);
if (allow_keepalive && we_send_keepalive_messages) { if (allow_keepalive) {
/* This may put data into iobuf.msg w/o flushing. */ /* This may put data into iobuf.msg w/o flushing. */
maybe_send_keepalive(t, False); maybe_send_keepalive(t, 0);
} }
if (!last_io_in) if (!last_io_in)
last_io_in = t; last_io_in = t;
if (am_receiver)
return;
chk = MAX(last_io_out, last_io_in); chk = MAX(last_io_out, last_io_in);
if (t - chk >= io_timeout) { if (t - chk >= io_timeout) {
if (am_server) if (am_server)
...@@ -261,8 +270,8 @@ static size_t safe_read(int fd, char *buf, size_t len) ...@@ -261,8 +270,8 @@ static size_t safe_read(int fd, char *buf, size_t len)
who_am_i()); who_am_i());
exit_cleanup(RERR_FILEIO); exit_cleanup(RERR_FILEIO);
} }
if (we_send_keepalive_messages) if (io_timeout)
maybe_send_keepalive(time(NULL), True); maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
continue; continue;
} }
...@@ -346,8 +355,8 @@ static void safe_write(int fd, const char *buf, size_t len) ...@@ -346,8 +355,8 @@ static void safe_write(int fd, const char *buf, size_t len)
what_fd_is(fd), who_am_i()); what_fd_is(fd), who_am_i());
exit_cleanup(RERR_FILEIO); exit_cleanup(RERR_FILEIO);
} }
if (we_send_keepalive_messages) if (io_timeout)
maybe_send_keepalive(time(NULL), True); maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
continue; continue;
} }
...@@ -781,8 +790,11 @@ static char *perform_io(size_t needed, int flags) ...@@ -781,8 +790,11 @@ static char *perform_io(size_t needed, int flags)
if (msgs2stderr && DEBUG_GTE(IO, 2)) if (msgs2stderr && DEBUG_GTE(IO, 2))
rprintf(FINFO, "[%s] recv=%ld\n", who_am_i(), (long)n); rprintf(FINFO, "[%s] recv=%ld\n", who_am_i(), (long)n);
if (io_timeout) if (io_timeout) {
last_io_in = time(NULL); last_io_in = time(NULL);
if (flags & PIO_NEED_INPUT)
maybe_send_keepalive(last_io_in, 0);
}
stats.total_read += n; stats.total_read += n;
iobuf.in.len += n; iobuf.in.len += n;
...@@ -1053,13 +1065,15 @@ void io_set_sock_fds(int f_in, int f_out) ...@@ -1053,13 +1065,15 @@ void io_set_sock_fds(int f_in, int f_out)
void set_io_timeout(int secs) void set_io_timeout(int secs)
{ {
io_timeout = secs; io_timeout = secs;
allowed_lull = (io_timeout + 1) / 2;
if (!io_timeout || io_timeout > SELECT_TIMEOUT) if (!io_timeout || allowed_lull > SELECT_TIMEOUT)
select_timeout = SELECT_TIMEOUT; select_timeout = SELECT_TIMEOUT;
else else
select_timeout = io_timeout; select_timeout = allowed_lull;
allowed_lull = read_batch ? 0 : (io_timeout + 1) / 2; if (read_batch)
allowed_lull = 0;
} }
static void check_for_d_option_error(const char *msg) static void check_for_d_option_error(const char *msg)
...@@ -1333,12 +1347,20 @@ void maybe_flush_socket(int important) ...@@ -1333,12 +1347,20 @@ void maybe_flush_socket(int important)
* rsync versions. This avoids any message forwarding, and leaves the raw-data * rsync versions. This avoids any message forwarding, and leaves the raw-data
* stream alone (since we can never be quite sure if that stream is in the * stream alone (since we can never be quite sure if that stream is in the
* right state for a keep-alive message). */ * right state for a keep-alive message). */
void maybe_send_keepalive(time_t now, BOOL allow_flush) void maybe_send_keepalive(time_t now, int flags)
{ {
if (flags & MSK_ACTIVE_RECEIVER)
last_io_in = now; /* Fudge things when we're working hard on the files. */
if (now - last_io_out >= allowed_lull) { if (now - last_io_out >= allowed_lull) {
/* The receiver is special: it only sends keep-alive messages if it is
* actively receiving data. Otherwise, it lets the generator timeout. */
if (am_receiver && now - last_io_in >= io_timeout)
return;
if (!iobuf.msg.len && iobuf.out.len == iobuf.out_empty_len) if (!iobuf.msg.len && iobuf.out.len == iobuf.out_empty_len)
send_msg(MSG_DATA, "", 0, 0); send_msg(MSG_DATA, "", 0, 0);
if (!allow_flush) { if (!(flags & MSK_ALLOW_FLUSH)) {
/* Let the caller worry about writing out the data. */ /* Let the caller worry about writing out the data. */
} else if (iobuf.msg.len) } else if (iobuf.msg.len)
perform_io(iobuf.msg.size - iobuf.msg.len + 1, PIO_NEED_MSGROOM); perform_io(iobuf.msg.size - iobuf.msg.len + 1, PIO_NEED_MSGROOM);
...@@ -1430,7 +1452,7 @@ static void read_a_msg(void) ...@@ -1430,7 +1452,7 @@ static void read_a_msg(void)
goto invalid_msg; goto invalid_msg;
iobuf.in_multiplexed = 1; iobuf.in_multiplexed = 1;
if (am_sender) if (am_sender)
maybe_send_keepalive(time(NULL), True); maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
break; break;
case MSG_DELETED: case MSG_DELETED:
if (msg_bytes >= sizeof data) if (msg_bytes >= sizeof data)
......
...@@ -73,7 +73,6 @@ extern int send_msgs_to_gen; ...@@ -73,7 +73,6 @@ extern int send_msgs_to_gen;
extern pid_t cleanup_child_pid; extern pid_t cleanup_child_pid;
extern size_t bwlimit_writemax; extern size_t bwlimit_writemax;
extern unsigned int module_dirlen; extern unsigned int module_dirlen;
extern BOOL we_send_keepalive_messages;
extern BOOL flist_receiving_enabled; extern BOOL flist_receiving_enabled;
extern BOOL shutting_down; extern BOOL shutting_down;
extern struct stats stats; extern struct stats stats;
...@@ -765,8 +764,6 @@ static void do_server_sender(int f_in, int f_out, int argc, char *argv[]) ...@@ -765,8 +764,6 @@ static void do_server_sender(int f_in, int f_out, int argc, char *argv[])
struct file_list *flist; struct file_list *flist;
char *dir = argv[0]; char *dir = argv[0];
we_send_keepalive_messages = io_timeout != 0 && protocol_version < 31;
if (DEBUG_GTE(SEND, 1)) { if (DEBUG_GTE(SEND, 1)) {
rprintf(FINFO, "server_sender starting pid=%ld\n", rprintf(FINFO, "server_sender starting pid=%ld\n",
(long)getpid()); (long)getpid());
...@@ -907,7 +904,6 @@ static int do_recv(int f_in, int f_out, char *local_name) ...@@ -907,7 +904,6 @@ static int do_recv(int f_in, int f_out, char *local_name)
} }
am_generator = 1; am_generator = 1;
we_send_keepalive_messages = io_timeout != 0;
flist_receiving_enabled = True; flist_receiving_enabled = True;
io_end_multiplex_in(MPLX_SWITCHING); io_end_multiplex_in(MPLX_SWITCHING);
...@@ -1112,7 +1108,6 @@ int client_run(int f_in, int f_out, pid_t pid, int argc, char *argv[]) ...@@ -1112,7 +1108,6 @@ int client_run(int f_in, int f_out, pid_t pid, int argc, char *argv[])
if (am_sender) { if (am_sender) {
keep_dirlinks = 0; /* Must be disabled on the sender. */ keep_dirlinks = 0; /* Must be disabled on the sender. */
we_send_keepalive_messages = io_timeout != 0 && protocol_version < 31;
if (always_checksum if (always_checksum
&& (log_format_has(stdout_format, 'C') && (log_format_has(stdout_format, 'C')
......
...@@ -48,6 +48,7 @@ extern int keep_partial; ...@@ -48,6 +48,7 @@ extern int keep_partial;
extern int checksum_len; extern int checksum_len;
extern int checksum_seed; extern int checksum_seed;
extern int inplace; extern int inplace;
extern int allowed_lull;
extern int delay_updates; extern int delay_updates;
extern mode_t orig_umask; extern mode_t orig_umask;
extern struct stats stats; extern struct stats stats;
...@@ -254,6 +255,9 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, ...@@ -254,6 +255,9 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
if (INFO_GTE(PROGRESS, 1)) if (INFO_GTE(PROGRESS, 1))
show_progress(offset, total_size); show_progress(offset, total_size);
if (allowed_lull)
maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH | MSK_ACTIVE_RECEIVER);
if (i > 0) { if (i > 0) {
if (DEBUG_GTE(DELTASUM, 3)) { if (DEBUG_GTE(DELTASUM, 3)) {
rprintf(FINFO,"data recv %d at %s\n", rprintf(FINFO,"data recv %d at %s\n",
......
...@@ -368,7 +368,7 @@ int read_ndx_and_attrs(int f_in, int f_out, int *iflag_ptr, uchar *type_ptr, ...@@ -368,7 +368,7 @@ int read_ndx_and_attrs(int f_in, int f_out, int *iflag_ptr, uchar *type_ptr,
/* Support the protocol-29 keep-alive style. */ /* Support the protocol-29 keep-alive style. */
if (protocol_version < 30 && ndx == cur_flist->used && iflags == ITEM_IS_NEW) { if (protocol_version < 30 && ndx == cur_flist->used && iflags == ITEM_IS_NEW) {
if (am_sender) if (am_sender)
maybe_send_keepalive(time(NULL), True); maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
goto read_loop; goto read_loop;
} }
......
...@@ -235,7 +235,7 @@ enum msgcode { ...@@ -235,7 +235,7 @@ enum msgcode {
MSG_IO_ERROR=22,/* the sending side had an I/O error */ MSG_IO_ERROR=22,/* the sending side had an I/O error */
MSG_IO_TIMEOUT=33,/* tell client about a daemon's timeout value */ MSG_IO_TIMEOUT=33,/* tell client about a daemon's timeout value */
MSG_NOOP=42, /* a do-nothing message (legacy protocol-30 only) */ MSG_NOOP=42, /* a do-nothing message (legacy protocol-30 only) */
MSG_ERROR_EXIT=86, /* used by siblings and by protocol-31 */ MSG_ERROR_EXIT=86, /* synchronize an error exit (siblings and protocol >= 31) */
MSG_SUCCESS=100,/* successfully updated indicated flist index */ MSG_SUCCESS=100,/* successfully updated indicated flist index */
MSG_DELETED=101,/* successfully deleted a file on receiving side */ MSG_DELETED=101,/* successfully deleted a file on receiving side */
MSG_NO_SEND=102,/* sender failed to open a file we wanted */ MSG_NO_SEND=102,/* sender failed to open a file we wanted */
...@@ -267,6 +267,10 @@ enum delret { ...@@ -267,6 +267,10 @@ enum delret {
#define MKP_DROP_NAME (1<<0) /* drop trailing filename or trailing slash */ #define MKP_DROP_NAME (1<<0) /* drop trailing filename or trailing slash */
#define MKP_SKIP_SLASH (1<<1) /* skip one or more leading slashes */ #define MKP_SKIP_SLASH (1<<1) /* skip one or more leading slashes */
/* Defines for maybe_send_keepalive() */
#define MSK_ALLOW_FLUSH (1<<0)
#define MSK_ACTIVE_RECEIVER (1<<1)
#include "errcode.h" #include "errcode.h"
#include "config.h" #include "config.h"
......
...@@ -43,7 +43,6 @@ extern int inplace; ...@@ -43,7 +43,6 @@ extern int inplace;
extern int batch_fd; extern int batch_fd;
extern int write_batch; extern int write_batch;
extern int file_old_total; extern int file_old_total;
extern BOOL we_send_keepalive_messages;
extern struct stats stats; extern struct stats stats;
extern struct file_list *cur_flist, *first_flist, *dir_flist; extern struct file_list *cur_flist, *first_flist, *dir_flist;
...@@ -64,7 +63,7 @@ static struct sum_struct *receive_sums(int f) ...@@ -64,7 +63,7 @@ static struct sum_struct *receive_sums(int f)
{ {
struct sum_struct *s; struct sum_struct *s;
int32 i; int32 i;
int lull_mod = allowed_lull * 5; int lull_mod = protocol_version >= 31 ? 0 : allowed_lull * 5;
OFF_T offset = 0; OFF_T offset = 0;
if (!(s = new(struct sum_struct))) if (!(s = new(struct sum_struct)))
...@@ -105,7 +104,7 @@ static struct sum_struct *receive_sums(int f) ...@@ -105,7 +104,7 @@ static struct sum_struct *receive_sums(int f)
s->sums[i].len = s->blength; s->sums[i].len = s->blength;
offset += s->sums[i].len; offset += s->sums[i].len;
if (we_send_keepalive_messages && !(i % lull_mod)) if (lull_mod && !(i % lull_mod))
maybe_send_keepalive(time(NULL), True); maybe_send_keepalive(time(NULL), True);
if (DEBUG_GTE(DELTASUM, 3)) { if (DEBUG_GTE(DELTASUM, 3)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment