This source file includes following definitions.
- free_cmd
- generate_callid
- recurring_helper
- start_delay_helper
- remote_node_up
- remote_node_down
- check_remote_node_state
- report_remote_ra_result
- update_remaining_timeout
- retry_start_cmd_cb
- connection_takeover_timeout_cb
- monitor_timeout_cb
- synthesize_lrmd_success
- remote_lrm_op_callback
- handle_remote_ra_stop
- handle_remote_ra_start
- handle_remote_ra_exec
- remote_ra_data_init
- remote_ra_cleanup
- is_remote_lrmd_ra
- remote_ra_get_rsc_info
- is_remote_ra_supported_action
- fail_all_monitor_cmds
- remove_cmd
- remote_ra_cancel
- handle_dup_monitor
- remote_ra_exec
- remote_ra_fail
- remote_ra_process_pseudo
- remote_ra_maintenance
- remote_ra_process_maintenance_nodes
- remote_ra_is_in_maintenance
- remote_ra_controlling_guest
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <crm/crm.h>
13 #include <crm/msg_xml.h>
14 #include <crm/common/xml_internal.h>
15 #include <crm/lrmd.h>
16 #include <crm/services.h>
17
18 #include <pacemaker-controld.h>
19
20 #define REMOTE_LRMD_RA "remote"
21
22
23 #define MAX_START_TIMEOUT_MS 10000
24
25 typedef struct remote_ra_cmd_s {
26
27 char *owner;
28
29 char *rsc_id;
30
31 char *action;
32
33 char *userdata;
34 char *exit_reason;
35
36 int start_delay;
37
38 int delay_id;
39
40 int timeout;
41 int remaining_timeout;
42
43 guint interval_ms;
44
45 int interval_id;
46 int reported_success;
47 int monitor_timeout_id;
48 int takeover_timeout_id;
49
50 lrmd_key_value_t *params;
51
52 int rc;
53 int op_status;
54 int call_id;
55 time_t start_time;
56 gboolean cancel;
57 } remote_ra_cmd_t;
58
59 enum remote_migration_status {
60 expect_takeover = 1,
61 takeover_complete,
62 };
63
64 typedef struct remote_ra_data_s {
65 crm_trigger_t *work;
66 remote_ra_cmd_t *cur_cmd;
67 GList *cmds;
68 GList *recurring_cmds;
69
70 enum remote_migration_status migrate_status;
71
72 gboolean active;
73
74
75
76
77 gboolean is_maintenance;
78
79
80
81
82
83
84
85 gboolean controlling_guest;
86 } remote_ra_data_t;
87
88 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
89 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
90 static GList *fail_all_monitor_cmds(GList * list);
91
92 static void
93 free_cmd(gpointer user_data)
94 {
95 remote_ra_cmd_t *cmd = user_data;
96
97 if (!cmd) {
98 return;
99 }
100 if (cmd->delay_id) {
101 g_source_remove(cmd->delay_id);
102 }
103 if (cmd->interval_id) {
104 g_source_remove(cmd->interval_id);
105 }
106 if (cmd->monitor_timeout_id) {
107 g_source_remove(cmd->monitor_timeout_id);
108 }
109 if (cmd->takeover_timeout_id) {
110 g_source_remove(cmd->takeover_timeout_id);
111 }
112 free(cmd->owner);
113 free(cmd->rsc_id);
114 free(cmd->action);
115 free(cmd->userdata);
116 free(cmd->exit_reason);
117 lrmd_key_value_freeall(cmd->params);
118 free(cmd);
119 }
120
121 static int
122 generate_callid(void)
123 {
124 static int remote_ra_callid = 0;
125
126 remote_ra_callid++;
127 if (remote_ra_callid <= 0) {
128 remote_ra_callid = 1;
129 }
130
131 return remote_ra_callid;
132 }
133
134 static gboolean
135 recurring_helper(gpointer data)
136 {
137 remote_ra_cmd_t *cmd = data;
138 lrm_state_t *connection_rsc = NULL;
139
140 cmd->interval_id = 0;
141 connection_rsc = lrm_state_find(cmd->rsc_id);
142 if (connection_rsc && connection_rsc->remote_ra_data) {
143 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
144
145 ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
146
147 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
148 mainloop_set_trigger(ra_data->work);
149 }
150 return FALSE;
151 }
152
153 static gboolean
154 start_delay_helper(gpointer data)
155 {
156 remote_ra_cmd_t *cmd = data;
157 lrm_state_t *connection_rsc = NULL;
158
159 cmd->delay_id = 0;
160 connection_rsc = lrm_state_find(cmd->rsc_id);
161 if (connection_rsc && connection_rsc->remote_ra_data) {
162 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
163
164 mainloop_set_trigger(ra_data->work);
165 }
166 return FALSE;
167 }
168
169
170
171
172
173
174
175 static void
176 remote_node_up(const char *node_name)
177 {
178 int call_opt, call_id = 0;
179 xmlNode *update, *state;
180 crm_node_t *node;
181 enum controld_section_e section = controld_section_all;
182
183 CRM_CHECK(node_name != NULL, return);
184 crm_info("Announcing pacemaker_remote node %s", node_name);
185
186
187
188
189
190
191 call_opt = crmd_cib_smart_opt();
192 if (controld_shutdown_lock_enabled) {
193 section = controld_section_all_unlocked;
194 }
195 controld_delete_node_state(node_name, section, call_opt);
196
197
198 update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
199
200
201 node = crm_remote_peer_get(node_name);
202 CRM_CHECK(node != NULL, return);
203 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
204
205
206
207
208
209
210
211 send_remote_state_message(node_name, TRUE);
212
213 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
214 state = create_node_state_update(node, node_update_cluster, update,
215 __func__);
216
217
218
219
220
221 crm_xml_add(state, XML_NODE_IS_FENCED, "0");
222
223
224
225
226
227
228
229
230 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
231 if (call_id < 0) {
232 crm_perror(LOG_WARNING, "%s CIB node state setup", node_name);
233 }
234 free_xml(update);
235 }
236
237 enum down_opts {
238 DOWN_KEEP_LRM,
239 DOWN_ERASE_LRM
240 };
241
242
243
244
245
246
247
248
249 static void
250 remote_node_down(const char *node_name, const enum down_opts opts)
251 {
252 xmlNode *update;
253 int call_id = 0;
254 int call_opt = crmd_cib_smart_opt();
255 crm_node_t *node;
256
257
258 update_attrd_remote_node_removed(node_name, NULL);
259
260
261
262
263
264
265 if (opts == DOWN_ERASE_LRM) {
266 controld_delete_node_state(node_name, controld_section_all, call_opt);
267 } else {
268 controld_delete_node_state(node_name, controld_section_attrs, call_opt);
269 }
270
271
272 node = crm_remote_peer_get(node_name);
273 CRM_CHECK(node != NULL, return);
274 pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
275
276
277 send_remote_state_message(node_name, FALSE);
278
279
280 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
281 create_node_state_update(node, node_update_cluster, update, __func__);
282 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
283 if (call_id < 0) {
284 crm_perror(LOG_ERR, "%s CIB node state update", node_name);
285 }
286 free_xml(update);
287 }
288
289
290
291
292
293
294
295 static void
296 check_remote_node_state(remote_ra_cmd_t *cmd)
297 {
298
299 if (cmd->rc != PCMK_OCF_OK) {
300 return;
301 }
302
303 if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
304 remote_node_up(cmd->rsc_id);
305
306 } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) {
307
308
309
310
311
312
313
314 crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
315
316 CRM_CHECK(node != NULL, return);
317 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
318
319 } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
320 lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
321 remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
322
323 if (ra_data) {
324 if (ra_data->migrate_status != takeover_complete) {
325
326 remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
327 } else if (AM_I_DC == FALSE) {
328
329
330
331
332 crm_remote_peer_cache_remove(cmd->rsc_id);
333 }
334 }
335 }
336
337
338
339
340
341
342
343
344
345
346 }
347
348 static void
349 report_remote_ra_result(remote_ra_cmd_t * cmd)
350 {
351 lrmd_event_data_t op = { 0, };
352
353 check_remote_node_state(cmd);
354
355 op.type = lrmd_event_exec_complete;
356 op.rsc_id = cmd->rsc_id;
357 op.op_type = cmd->action;
358 op.user_data = cmd->userdata;
359 op.exit_reason = cmd->exit_reason;
360 op.timeout = cmd->timeout;
361 op.interval_ms = cmd->interval_ms;
362 op.rc = cmd->rc;
363 op.op_status = cmd->op_status;
364 op.t_run = (unsigned int) cmd->start_time;
365 op.t_rcchange = (unsigned int) cmd->start_time;
366 if (cmd->reported_success && cmd->rc != PCMK_OCF_OK) {
367 op.t_rcchange = (unsigned int) time(NULL);
368
369
370
371
372
373
374
375
376
377 if (op.t_rcchange == op.t_run) {
378 op.t_rcchange++;
379 }
380 }
381
382 if (cmd->params) {
383 lrmd_key_value_t *tmp;
384
385 op.params = pcmk__strkey_table(free, free);
386 for (tmp = cmd->params; tmp; tmp = tmp->next) {
387 g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
388 }
389
390 }
391 op.call_id = cmd->call_id;
392 op.remote_nodename = cmd->owner;
393
394 lrm_op_callback(&op);
395
396 if (op.params) {
397 g_hash_table_destroy(op.params);
398 }
399 }
400
401 static void
402 update_remaining_timeout(remote_ra_cmd_t * cmd)
403 {
404 cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
405 }
406
407 static gboolean
408 retry_start_cmd_cb(gpointer data)
409 {
410 lrm_state_t *lrm_state = data;
411 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
412 remote_ra_cmd_t *cmd = NULL;
413 int rc = -1;
414
415 if (!ra_data || !ra_data->cur_cmd) {
416 return FALSE;
417 }
418 cmd = ra_data->cur_cmd;
419 if (!pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) {
420 return FALSE;
421 }
422 update_remaining_timeout(cmd);
423
424 if (cmd->remaining_timeout > 0) {
425 rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
426 }
427
428 if (rc != 0) {
429 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
430 cmd->op_status = PCMK_LRM_OP_ERROR;
431 report_remote_ra_result(cmd);
432
433 if (ra_data->cmds) {
434 mainloop_set_trigger(ra_data->work);
435 }
436 ra_data->cur_cmd = NULL;
437 free_cmd(cmd);
438 } else {
439
440 }
441
442 return FALSE;
443 }
444
445
446 static gboolean
447 connection_takeover_timeout_cb(gpointer data)
448 {
449 lrm_state_t *lrm_state = NULL;
450 remote_ra_cmd_t *cmd = data;
451
452 crm_info("takeover event timed out for node %s", cmd->rsc_id);
453 cmd->takeover_timeout_id = 0;
454
455 lrm_state = lrm_state_find(cmd->rsc_id);
456
457 handle_remote_ra_stop(lrm_state, cmd);
458 free_cmd(cmd);
459
460 return FALSE;
461 }
462
463 static gboolean
464 monitor_timeout_cb(gpointer data)
465 {
466 lrm_state_t *lrm_state = NULL;
467 remote_ra_cmd_t *cmd = data;
468
469 lrm_state = lrm_state_find(cmd->rsc_id);
470
471 crm_info("Timed out waiting for remote poke response from %s%s",
472 cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
473 cmd->monitor_timeout_id = 0;
474 cmd->op_status = PCMK_LRM_OP_TIMEOUT;
475 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
476
477 if (lrm_state && lrm_state->remote_ra_data) {
478 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
479
480 if (ra_data->cur_cmd == cmd) {
481 ra_data->cur_cmd = NULL;
482 }
483 if (ra_data->cmds) {
484 mainloop_set_trigger(ra_data->work);
485 }
486 }
487
488 report_remote_ra_result(cmd);
489 free_cmd(cmd);
490
491 if(lrm_state) {
492 lrm_state_disconnect(lrm_state);
493 }
494 return FALSE;
495 }
496
497 static void
498 synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
499 {
500 lrmd_event_data_t op = { 0, };
501
502 if (lrm_state == NULL) {
503
504 lrm_state = lrm_state_find(fsa_our_uname);
505 }
506 CRM_ASSERT(lrm_state != NULL);
507
508 op.type = lrmd_event_exec_complete;
509 op.rsc_id = rsc_id;
510 op.op_type = op_type;
511 op.rc = PCMK_OCF_OK;
512 op.op_status = PCMK_LRM_OP_DONE;
513 op.t_run = (unsigned int) time(NULL);
514 op.t_rcchange = op.t_run;
515 op.call_id = generate_callid();
516 process_lrm_event(lrm_state, &op, NULL, NULL);
517 }
518
519 void
520 remote_lrm_op_callback(lrmd_event_data_t * op)
521 {
522 gboolean cmd_handled = FALSE;
523 lrm_state_t *lrm_state = NULL;
524 remote_ra_data_t *ra_data = NULL;
525 remote_ra_cmd_t *cmd = NULL;
526
527 crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
528 "(%d) status=%s (%d)",
529 (op->op_type? op->op_type : ""), (op->op_type? " " : ""),
530 lrmd_event_type2str(op->type), op->remote_nodename,
531 services_ocf_exitcode_str(op->rc), op->rc,
532 services_lrm_status_str(op->op_status), op->op_status);
533
534 lrm_state = lrm_state_find(op->remote_nodename);
535 if (!lrm_state || !lrm_state->remote_ra_data) {
536 crm_debug("No state information found for remote connection event");
537 return;
538 }
539 ra_data = lrm_state->remote_ra_data;
540
541 if (op->type == lrmd_event_new_client) {
542
543
544 if (ra_data->migrate_status == expect_takeover) {
545
546 ra_data->migrate_status = takeover_complete;
547
548 } else {
549 crm_err("Unexpected pacemaker_remote client takeover for %s. Disconnecting", op->remote_nodename);
550
551
552
553 lrm_state_disconnect_only(lrm_state);
554 }
555 return;
556 }
557
558
559 if (op->type == lrmd_event_exec_complete) {
560 if (ra_data->migrate_status == takeover_complete) {
561 crm_debug("ignoring event, this connection is taken over by another node");
562 } else {
563 lrm_op_callback(op);
564 }
565 return;
566 }
567
568 if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
569
570 if (ra_data->active == FALSE) {
571 crm_debug("Disconnection from Pacemaker Remote node %s complete",
572 lrm_state->node_name);
573
574 } else if (!remote_ra_is_in_maintenance(lrm_state)) {
575 crm_err("Lost connection to Pacemaker Remote node %s",
576 lrm_state->node_name);
577 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
578 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
579
580 } else {
581 crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
582 lrm_state->node_name);
583
584 handle_remote_ra_stop(lrm_state, NULL);
585 remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
586
587 synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
588 }
589 return;
590 }
591
592 if (!ra_data->cur_cmd) {
593 crm_debug("no event to match");
594 return;
595 }
596
597 cmd = ra_data->cur_cmd;
598
599
600
601 if (op->type == lrmd_event_connect && pcmk__strcase_any_of(cmd->action, "start",
602 "migrate_from", NULL)) {
603 if (op->connection_rc < 0) {
604 update_remaining_timeout(cmd);
605
606 if (op->connection_rc == -ENOKEY) {
607
608 cmd->op_status = PCMK_LRM_OP_ERROR;
609 cmd->rc = PCMK_OCF_INVALID_PARAM;
610 cmd->exit_reason = strdup("Authentication key not readable");
611
612 } else if (cmd->remaining_timeout > 3000) {
613 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
614 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
615 return;
616
617 } else {
618 crm_trace("can't reschedule start, remaining timeout too small %d",
619 cmd->remaining_timeout);
620 cmd->op_status = PCMK_LRM_OP_TIMEOUT;
621 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
622 }
623
624 } else {
625 lrm_state_reset_tables(lrm_state, TRUE);
626 cmd->rc = PCMK_OCF_OK;
627 cmd->op_status = PCMK_LRM_OP_DONE;
628 ra_data->active = TRUE;
629 }
630
631 crm_debug("Remote connection event matched %s action", cmd->action);
632 report_remote_ra_result(cmd);
633 cmd_handled = TRUE;
634
635 } else if (op->type == lrmd_event_poke && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
636
637 if (cmd->monitor_timeout_id) {
638 g_source_remove(cmd->monitor_timeout_id);
639 cmd->monitor_timeout_id = 0;
640 }
641
642
643
644
645 if (!cmd->reported_success) {
646 cmd->rc = PCMK_OCF_OK;
647 cmd->op_status = PCMK_LRM_OP_DONE;
648 report_remote_ra_result(cmd);
649 cmd->reported_success = 1;
650 }
651
652 crm_debug("Remote poke event matched %s action", cmd->action);
653
654
655 if (cmd->interval_ms && (cmd->cancel == FALSE)) {
656 ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
657 cmd->interval_id = g_timeout_add(cmd->interval_ms,
658 recurring_helper, cmd);
659 cmd = NULL;
660 }
661 cmd_handled = TRUE;
662
663 } else if (op->type == lrmd_event_disconnect && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
664 if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
665 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
666 cmd->op_status = PCMK_LRM_OP_ERROR;
667 report_remote_ra_result(cmd);
668 crm_err("Remote connection to %s unexpectedly dropped during monitor",
669 lrm_state->node_name);
670 }
671 cmd_handled = TRUE;
672
673 } else if (op->type == lrmd_event_new_client && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
674
675 handle_remote_ra_stop(lrm_state, cmd);
676 cmd_handled = TRUE;
677
678 } else {
679 crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
680 }
681
682 if (cmd_handled) {
683 ra_data->cur_cmd = NULL;
684 if (ra_data->cmds) {
685 mainloop_set_trigger(ra_data->work);
686 }
687 free_cmd(cmd);
688 }
689 }
690
691 static void
692 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
693 {
694 remote_ra_data_t *ra_data = NULL;
695
696 CRM_ASSERT(lrm_state);
697 ra_data = lrm_state->remote_ra_data;
698
699 if (ra_data->migrate_status != takeover_complete) {
700
701 g_hash_table_remove_all(lrm_state->pending_ops);
702 } else {
703
704
705 lrm_state_reset_tables(lrm_state, FALSE);
706 }
707
708 ra_data->active = FALSE;
709 lrm_state_disconnect(lrm_state);
710
711 if (ra_data->cmds) {
712 g_list_free_full(ra_data->cmds, free_cmd);
713 }
714 if (ra_data->recurring_cmds) {
715 g_list_free_full(ra_data->recurring_cmds, free_cmd);
716 }
717 ra_data->cmds = NULL;
718 ra_data->recurring_cmds = NULL;
719 ra_data->cur_cmd = NULL;
720
721 if (cmd) {
722 cmd->rc = PCMK_OCF_OK;
723 cmd->op_status = PCMK_LRM_OP_DONE;
724
725 report_remote_ra_result(cmd);
726 }
727 }
728
729 static int
730 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
731 {
732 const char *server = NULL;
733 lrmd_key_value_t *tmp = NULL;
734 int port = 0;
735 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
736 int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
737
738 for (tmp = cmd->params; tmp; tmp = tmp->next) {
739 if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR,
740 XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) {
741 server = tmp->value;
742 } else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) {
743 port = atoi(tmp->value);
744 } else if (pcmk__str_eq(tmp->key, CRM_META "_" XML_RSC_ATTR_CONTAINER, pcmk__str_casei)) {
745 ra_data->controlling_guest = TRUE;
746 }
747 }
748
749 return lrm_state_remote_connect_async(lrm_state, server, port, timeout_used);
750 }
751
752 static gboolean
753 handle_remote_ra_exec(gpointer user_data)
754 {
755 int rc = 0;
756 lrm_state_t *lrm_state = user_data;
757 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
758 remote_ra_cmd_t *cmd;
759 GList *first = NULL;
760
761 if (ra_data->cur_cmd) {
762
763 return TRUE;
764 }
765
766 while (ra_data->cmds) {
767 first = ra_data->cmds;
768 cmd = first->data;
769 if (cmd->delay_id) {
770
771 return TRUE;
772 }
773
774 ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
775 g_list_free_1(first);
776
777 if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
778 ra_data->migrate_status = 0;
779 rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
780 if (rc == 0) {
781
782 crm_debug("Initiated async remote connection, %s action will complete after connect event",
783 cmd->action);
784 ra_data->cur_cmd = cmd;
785 return TRUE;
786 } else {
787 crm_debug("Could not initiate remote connection for %s action",
788 cmd->action);
789 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
790 cmd->op_status = PCMK_LRM_OP_ERROR;
791 }
792 report_remote_ra_result(cmd);
793
794 } else if (!strcmp(cmd->action, "monitor")) {
795
796 if (lrm_state_is_connected(lrm_state) == TRUE) {
797 rc = lrm_state_poke_connection(lrm_state);
798 if (rc < 0) {
799 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
800 cmd->op_status = PCMK_LRM_OP_ERROR;
801 }
802 } else {
803 rc = -1;
804 cmd->op_status = PCMK_LRM_OP_DONE;
805 cmd->rc = PCMK_OCF_NOT_RUNNING;
806 }
807
808 if (rc == 0) {
809 crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
810 cmd->rsc_id);
811 ra_data->cur_cmd = cmd;
812 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
813 return TRUE;
814 }
815 report_remote_ra_result(cmd);
816
817 } else if (!strcmp(cmd->action, "stop")) {
818
819 if (ra_data->migrate_status == expect_takeover) {
820
821
822
823
824
825
826 cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
827 ra_data->cur_cmd = cmd;
828 return TRUE;
829 }
830
831 handle_remote_ra_stop(lrm_state, cmd);
832
833 } else if (!strcmp(cmd->action, "migrate_to")) {
834 ra_data->migrate_status = expect_takeover;
835 cmd->rc = PCMK_OCF_OK;
836 cmd->op_status = PCMK_LRM_OP_DONE;
837 report_remote_ra_result(cmd);
838 } else if (pcmk__str_any_of(cmd->action, CRMD_ACTION_RELOAD,
839 CRMD_ACTION_RELOAD_AGENT, NULL)) {
840
841
842
843
844
845
846
847
848
849 cmd->rc = PCMK_OCF_OK;
850 cmd->op_status = PCMK_LRM_OP_DONE;
851 report_remote_ra_result(cmd);
852 }
853
854 free_cmd(cmd);
855 }
856
857 return TRUE;
858 }
859
860 static void
861 remote_ra_data_init(lrm_state_t * lrm_state)
862 {
863 remote_ra_data_t *ra_data = NULL;
864
865 if (lrm_state->remote_ra_data) {
866 return;
867 }
868
869 ra_data = calloc(1, sizeof(remote_ra_data_t));
870 ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
871 lrm_state->remote_ra_data = ra_data;
872 }
873
874 void
875 remote_ra_cleanup(lrm_state_t * lrm_state)
876 {
877 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
878
879 if (!ra_data) {
880 return;
881 }
882
883 if (ra_data->cmds) {
884 g_list_free_full(ra_data->cmds, free_cmd);
885 }
886
887 if (ra_data->recurring_cmds) {
888 g_list_free_full(ra_data->recurring_cmds, free_cmd);
889 }
890 mainloop_destroy_trigger(ra_data->work);
891 free(ra_data);
892 lrm_state->remote_ra_data = NULL;
893 }
894
895 gboolean
896 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
897 {
898 if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
899 return TRUE;
900 }
901 if (id && lrm_state_find(id) && !pcmk__str_eq(id, fsa_our_uname, pcmk__str_casei)) {
902 return TRUE;
903 }
904
905 return FALSE;
906 }
907
908 lrmd_rsc_info_t *
909 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
910 {
911 lrmd_rsc_info_t *info = NULL;
912
913 if ((lrm_state_find(rsc_id))) {
914 info = calloc(1, sizeof(lrmd_rsc_info_t));
915
916 info->id = strdup(rsc_id);
917 info->type = strdup(REMOTE_LRMD_RA);
918 info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
919 info->provider = strdup("pacemaker");
920 }
921
922 return info;
923 }
924
925 static gboolean
926 is_remote_ra_supported_action(const char *action)
927 {
928 return pcmk__str_any_of(action,
929 CRMD_ACTION_START,
930 CRMD_ACTION_STOP,
931 CRMD_ACTION_STATUS,
932 CRMD_ACTION_MIGRATE,
933 CRMD_ACTION_MIGRATED,
934 CRMD_ACTION_RELOAD_AGENT,
935 CRMD_ACTION_RELOAD,
936 NULL);
937 }
938
939 static GList *
940 fail_all_monitor_cmds(GList * list)
941 {
942 GList *rm_list = NULL;
943 remote_ra_cmd_t *cmd = NULL;
944 GList *gIter = NULL;
945
946 for (gIter = list; gIter != NULL; gIter = gIter->next) {
947 cmd = gIter->data;
948 if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
949 rm_list = g_list_append(rm_list, cmd);
950 }
951 }
952
953 for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
954 cmd = gIter->data;
955
956 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
957 cmd->op_status = PCMK_LRM_OP_ERROR;
958 crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
959 cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
960 report_remote_ra_result(cmd);
961
962 list = g_list_remove(list, cmd);
963 free_cmd(cmd);
964 }
965
966
967 g_list_free(rm_list);
968 return list;
969 }
970
971 static GList *
972 remove_cmd(GList * list, const char *action, guint interval_ms)
973 {
974 remote_ra_cmd_t *cmd = NULL;
975 GList *gIter = NULL;
976
977 for (gIter = list; gIter != NULL; gIter = gIter->next) {
978 cmd = gIter->data;
979 if ((cmd->interval_ms == interval_ms)
980 && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
981 break;
982 }
983 cmd = NULL;
984 }
985 if (cmd) {
986 list = g_list_remove(list, cmd);
987 free_cmd(cmd);
988 }
989 return list;
990 }
991
992 int
993 remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
994 const char *action, guint interval_ms)
995 {
996 lrm_state_t *connection_rsc = NULL;
997 remote_ra_data_t *ra_data = NULL;
998
999 connection_rsc = lrm_state_find(rsc_id);
1000 if (!connection_rsc || !connection_rsc->remote_ra_data) {
1001 return -EINVAL;
1002 }
1003
1004 ra_data = connection_rsc->remote_ra_data;
1005 ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
1006 ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
1007 interval_ms);
1008 if (ra_data->cur_cmd &&
1009 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1010 (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
1011
1012 ra_data->cur_cmd->cancel = TRUE;
1013 }
1014
1015 return 0;
1016 }
1017
1018 static remote_ra_cmd_t *
1019 handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
1020 const char *userdata)
1021 {
1022 GList *gIter = NULL;
1023 remote_ra_cmd_t *cmd = NULL;
1024
1025
1026
1027
1028
1029
1030
1031 if (interval_ms == 0) {
1032 return NULL;
1033 }
1034
1035 if (ra_data->cur_cmd &&
1036 ra_data->cur_cmd->cancel == FALSE &&
1037 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1038 pcmk__str_eq(ra_data->cur_cmd->action, "monitor", pcmk__str_casei)) {
1039
1040 cmd = ra_data->cur_cmd;
1041 goto handle_dup;
1042 }
1043
1044 for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
1045 cmd = gIter->data;
1046 if ((cmd->interval_ms == interval_ms)
1047 && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1048 goto handle_dup;
1049 }
1050 }
1051
1052 for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
1053 cmd = gIter->data;
1054 if ((cmd->interval_ms == interval_ms)
1055 && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1056 goto handle_dup;
1057 }
1058 }
1059
1060 return NULL;
1061
1062 handle_dup:
1063
1064 crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
1065 cmd->rsc_id, "monitor", interval_ms);
1066
1067
1068 if (userdata) {
1069 free(cmd->userdata);
1070 cmd->userdata = strdup(userdata);
1071 }
1072
1073
1074 if (cmd->reported_success) {
1075 cmd->start_time = time(NULL);
1076 cmd->call_id = generate_callid();
1077 cmd->reported_success = 0;
1078 }
1079
1080
1081
1082
1083 if (cmd->interval_id) {
1084 g_source_remove(cmd->interval_id);
1085 cmd->interval_id = 0;
1086 recurring_helper(cmd);
1087 }
1088
1089 return cmd;
1090 }
1091
1092 int
1093 remote_ra_exec(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
1094 const char *userdata, guint interval_ms,
1095 int timeout,
1096 int start_delay,
1097 lrmd_key_value_t * params)
1098 {
1099 int rc = 0;
1100 lrm_state_t *connection_rsc = NULL;
1101 remote_ra_cmd_t *cmd = NULL;
1102 remote_ra_data_t *ra_data = NULL;
1103
1104 if (is_remote_ra_supported_action(action) == FALSE) {
1105 rc = -EINVAL;
1106 goto exec_done;
1107 }
1108
1109 connection_rsc = lrm_state_find(rsc_id);
1110 if (!connection_rsc) {
1111 rc = -EINVAL;
1112 goto exec_done;
1113 }
1114
1115 remote_ra_data_init(connection_rsc);
1116 ra_data = connection_rsc->remote_ra_data;
1117
1118 cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
1119 if (cmd) {
1120 rc = cmd->call_id;
1121 goto exec_done;
1122 }
1123
1124 cmd = calloc(1, sizeof(remote_ra_cmd_t));
1125 cmd->owner = strdup(lrm_state->node_name);
1126 cmd->rsc_id = strdup(rsc_id);
1127 cmd->action = strdup(action);
1128 cmd->userdata = strdup(userdata);
1129 cmd->interval_ms = interval_ms;
1130 cmd->timeout = timeout;
1131 cmd->start_delay = start_delay;
1132 cmd->params = params;
1133 cmd->start_time = time(NULL);
1134
1135 cmd->call_id = generate_callid();
1136
1137 if (cmd->start_delay) {
1138 cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
1139 }
1140
1141 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
1142 mainloop_set_trigger(ra_data->work);
1143
1144 return cmd->call_id;
1145 exec_done:
1146
1147 lrmd_key_value_freeall(params);
1148 return rc;
1149 }
1150
1151
1152
1153
1154
1155
1156
1157 void
1158 remote_ra_fail(const char *node_name)
1159 {
1160 lrm_state_t *lrm_state = lrm_state_find(node_name);
1161
1162 if (lrm_state && lrm_state_is_connected(lrm_state)) {
1163 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1164
1165 crm_info("Failing monitors on pacemaker_remote node %s", node_name);
1166 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
1167 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
1168 }
1169 }
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182 #define XPATH_PSEUDO_FENCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1183 "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
1184 "/" XML_CIB_TAG_NODE
1185
1186
1187
1188
1189
1190
1191
1192 void
1193 remote_ra_process_pseudo(xmlNode *xml)
1194 {
1195 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
1196
1197 if (numXpathResults(search) == 1) {
1198 xmlNode *result = getXpathResult(search, 0);
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214 if (result) {
1215 const char *remote = ID(result);
1216
1217 if (remote) {
1218 remote_node_down(remote, DOWN_ERASE_LRM);
1219 }
1220 }
1221 }
1222 freeXpathObject(search);
1223 }
1224
1225 static void
1226 remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
1227 {
1228 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1229 xmlNode *update, *state;
1230 int call_opt, call_id = 0;
1231 crm_node_t *node;
1232
1233 call_opt = crmd_cib_smart_opt();
1234 node = crm_remote_peer_get(lrm_state->node_name);
1235 CRM_CHECK(node != NULL, return);
1236 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
1237 state = create_node_state_update(node, node_update_none, update,
1238 __func__);
1239 crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
1240 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
1241 if (call_id < 0) {
1242 crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name);
1243 } else {
1244
1245 ra_data->is_maintenance = maintenance;
1246 }
1247 free_xml(update);
1248 }
1249
1250 #define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1251 "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
1252 XML_GRAPH_TAG_MAINTENANCE
1253
1254
1255
1256
1257
1258
1259
1260
1261 void
1262 remote_ra_process_maintenance_nodes(xmlNode *xml)
1263 {
1264 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
1265
1266 if (numXpathResults(search) == 1) {
1267 xmlNode *node;
1268 int cnt = 0, cnt_remote = 0;
1269
1270 for (node =
1271 first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
1272 node != NULL; node = pcmk__xml_next(node)) {
1273 lrm_state_t *lrm_state = lrm_state_find(ID(node));
1274
1275 cnt++;
1276 if (lrm_state && lrm_state->remote_ra_data &&
1277 ((remote_ra_data_t *) lrm_state->remote_ra_data)->active) {
1278 int is_maint;
1279
1280 cnt_remote++;
1281 pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE),
1282 &is_maint, 0);
1283 remote_ra_maintenance(lrm_state, is_maint);
1284 }
1285 }
1286 crm_trace("Action holds %d nodes (%d remotes found) "
1287 "adjusting maintenance-mode", cnt, cnt_remote);
1288 }
1289 freeXpathObject(search);
1290 }
1291
1292 gboolean
1293 remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
1294 {
1295 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1296
1297 return ra_data->is_maintenance;
1298 }
1299
1300 gboolean
1301 remote_ra_controlling_guest(lrm_state_t * lrm_state)
1302 {
1303 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1304
1305 return ra_data->controlling_guest;
1306 }