OpenDNSSEC-signer 2.1.13
signertasks.c
Go to the documentation of this file.
1/*
2 * Copyright (c) 2009 NLNet Labs. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
17 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
19 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
21 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
22 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
23 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *
25 */
26
27#include <time.h> /* time() */
28
29#include "daemon/engine.h"
30#include "scheduler/worker.h"
31#include "scheduler/schedule.h"
32#include "signertasks.h"
33#include "duration.h"
34#include "hsm.h"
35#include "locks.h"
36#include "util.h"
37#include "log.h"
38#include "status.h"
39#include "signer/tools.h"
40#include "signer/zone.h"
41#include "util.h"
42#include "signertasks.h"
43
48static void
49worker_queue_rrset(struct worker_context* context, fifoq_type* q, rrset_type* rrset, long* nsubtasks)
50{
51 ods_status status = ODS_STATUS_UNCHANGED;
52 int tries = 0;
53 ods_log_assert(q);
54 ods_log_assert(rrset);
55
56 pthread_mutex_lock(&q->q_lock);
57 status = fifoq_push(q, (void*) rrset, context, &tries);
58 while (status == ODS_STATUS_UNCHANGED) {
59 tries++;
60 if (context->worker->need_to_exit) {
61 pthread_mutex_unlock(&q->q_lock);
62 return;
63 }
70 ods_thread_wait(&q->q_nonfull, &q->q_lock, 5);
71 status = fifoq_push(q, (void*) rrset, context, &tries);
72 }
73 pthread_mutex_unlock(&q->q_lock);
74
75 ods_log_assert(status == ODS_STATUS_OK);
76 *nsubtasks += 1;
77}
78
79
84static void
85worker_queue_domain(struct worker_context* context, fifoq_type* q, domain_type* domain, long* nsubtasks)
86{
87 rrset_type* rrset = NULL;
88 denial_type* denial = NULL;
89 ods_log_assert(context);
90 ods_log_assert(q);
91 ods_log_assert(domain);
92 rrset = domain->rrsets;
93 while (rrset) {
94 worker_queue_rrset(context, q, rrset, nsubtasks);
95 rrset = rrset->next;
96 }
97 denial = (denial_type*) domain->denial;
98 if (denial && denial->rrset) {
99 worker_queue_rrset(context, q, denial->rrset, nsubtasks);
100 }
101}
102
103
108static void
109worker_queue_zone(struct worker_context* context, fifoq_type* q, zone_type* zone, long* nsubtasks)
110{
111 ldns_rbnode_t* node = LDNS_RBTREE_NULL;
112 domain_type* domain = NULL;
113 ods_log_assert(context);
114 ods_log_assert(q);
115 ods_log_assert(zone);
116 if (!zone->db || !zone->db->domains) {
117 return;
118 }
119 if (zone->db->domains->root != LDNS_RBTREE_NULL) {
120 node = ldns_rbtree_first(zone->db->domains);
121 }
122 while (node && node != LDNS_RBTREE_NULL) {
123 domain = (domain_type*) node->data;
124 worker_queue_domain(context, q, domain, nsubtasks);
125 node = ldns_rbtree_next(node);
126 }
127}
128
129
134static ods_status
135worker_check_jobs(worker_type* worker, task_type* task, int ntasks, long ntasksfailed)
136{
137 ods_log_assert(worker);
138 ods_log_assert(task);
139 if (ntasksfailed) {
140 ods_log_error("[%s] sign zone %s failed: %ld RRsets failed",
141 worker->name, task->owner, ntasksfailed);
142 return ODS_STATUS_ERR;
143 } else if (worker->need_to_exit) {
144 ods_log_error("[%s] sign zone %s failed: worker needs to exit",
145 worker->name, task->owner);
146 return ODS_STATUS_ERR;
147 }
148 return ODS_STATUS_OK;
149}
150
151void
152drudge(worker_type* worker)
153{
154 rrset_type* rrset;
155 ods_status status;
156 struct worker_context* superior;
157 hsm_ctx_t* ctx = NULL;
159 fifoq_type* signq = worker->taskq->signq;
160
161 while (worker->need_to_exit == 0) {
162 ods_log_deeebug("[%s] report for duty", worker->name);
163 pthread_mutex_lock(&signq->q_lock);
164 superior = NULL;
165 rrset = (rrset_type*) fifoq_pop(signq, (void**)&superior);
166 if (!rrset) {
167 ods_log_deeebug("[%s] nothing to do, wait", worker->name);
174 pthread_cond_wait(&signq->q_threshold, &signq->q_lock);
175 if(worker->need_to_exit == 0)
176 rrset = (rrset_type*) fifoq_pop(signq, (void**)&superior);
177 }
178 pthread_mutex_unlock(&signq->q_lock);
179 /* do some work */
180 if (rrset) {
181 ods_log_assert(superior);
182 if (!ctx) {
183 ods_log_debug("[%s] create hsm context", worker->name);
184 ctx = hsm_create_context();
185 }
186 if (!ctx) {
187 engine = superior->engine;
188 ods_log_crit("[%s] error creating libhsm context", worker->name);
190 pthread_mutex_lock(&engine->signal_lock);
191 pthread_cond_signal(&engine->signal_cond);
192 pthread_mutex_unlock(&engine->signal_lock);
193 ods_log_error("signer instructed to reload due to hsm reset while signing");
194 status = ODS_STATUS_HSM_ERR;
195 } else {
196 status = rrset_sign(ctx, rrset, superior->clock_in);
197 }
198 fifoq_report(signq, superior->worker, status);
199 }
200 /* done work */
201 }
202 /* cleanup open HSM sessions */
203 if (ctx) {
204 hsm_destroy_context(ctx);
205 }
206}
207
208time_t
209do_readsignconf(task_type* task, const char* zonename, void* zonearg, void *contextarg)
210{
211 struct worker_context* context = contextarg;
212 engine_type* engine = context->engine;
213 zone_type* zone = zonearg;
214 ods_status status;
215 status = tools_signconf(zone);
216 if (status == ODS_STATUS_UNCHANGED && !zone->signconf->last_modified) {
217 ods_log_debug("No signconf.xml for zone %s yet", task->owner);
218 status = ODS_STATUS_ERR;
219 zone->zoneconfigvalid = 0;
220 }
221 if (status == ODS_STATUS_OK || status == ODS_STATUS_UNCHANGED) {
222 /* status unchanged not really possible */
223 schedule_unscheduletask(engine->taskq, TASK_READ, zone->name);
224 schedule_scheduletask(engine->taskq, TASK_READ, zone->name, zone, &zone->zone_lock, schedule_PROMPTLY);
225 zone->zoneconfigvalid = 1;
226 return schedule_SUCCESS;
227 } else {
228 zone->zoneconfigvalid = 0;
229 if (!zone->signconf->last_modified) {
230 ods_log_warning("WARNING: unable to sign zone %s, signconf is not ready", task->owner);
231 } else {
232 ods_log_crit("CRITICAL: failed to sign zone %s: %s", task->owner, ods_status2str(status));
233 }
234 return schedule_DEFER;
235 }
236}
237
238time_t
239do_forcereadsignconf(task_type* task, const char* zonename, void* zonearg, void *contextarg)
240{
241 struct worker_context* context = contextarg;
242 engine_type* engine = context->engine;
243 zone_type* zone = zonearg;
244 ods_status status;
245 /* perform 'load signconf' task */
246 status = tools_signconf(zone);
247 if (status == ODS_STATUS_UNCHANGED) {
248 schedule_unscheduletask(engine->taskq, TASK_SIGNCONF, zone->name);
249 if(!zone->zoneconfigvalid) {
250 zone->zoneconfigvalid = 1;
251 schedule_unscheduletask(engine->taskq, TASK_READ, zone->name);
252 schedule_scheduletask(engine->taskq, TASK_READ, zone->name, zone, &zone->zone_lock, schedule_PROMPTLY);
253 }
254 return schedule_SUCCESS;
255 } else if (status == ODS_STATUS_OK) {
256 schedule_unscheduletask(engine->taskq, TASK_SIGNCONF, zone->name);
257 schedule_unscheduletask(engine->taskq, TASK_READ, zone->name);
258 schedule_unscheduletask(engine->taskq, TASK_SIGN, zone->name);
259 schedule_unscheduletask(engine->taskq, TASK_WRITE, zone->name);
260 schedule_scheduletask(engine->taskq, TASK_READ, zone->name, zone, &zone->zone_lock, schedule_PROMPTLY);
261 return schedule_SUCCESS;
262 } else {
263 return schedule_SUCCESS;
264 }
265}
266
267time_t
268do_signzone(task_type* task, const char* zonename, void* zonearg, void *contextarg)
269{
270 struct worker_context* context = contextarg;
271 engine_type* engine = context->engine;
272 worker_type* worker = context->worker;
273 zone_type* zone = zonearg;
274 ods_status status;
275 time_t start = 0;
276 time_t end = 0;
277 long nsubtasks = 0;
278 long nsubtasksfailed = 0;
279 context->clock_in = time_now();
280 status = zone_update_serial(zone);
281 if (status != ODS_STATUS_OK) {
282 if(!strcmp(zone->signconf->soa_serial,"keep") && (status == ODS_STATUS_FOPEN_ERR || status == ODS_STATUS_CONFLICT_ERR)) {
283 if(task->backoff > 0) {
284 ods_log_error("[%s] unable to sign zone %s: failed to increment serial", worker->name, task->owner);
285 ods_log_crit("[%s] CRITICAL: repeatedly failed to sign zone %s: %s", worker->name, task->owner, ods_status2str(status));
286 } else {
287 ods_log_warning("[%s] unable to sign zone %s: failed to increment serial", worker->name, task->owner);
288 ods_log_warning("[%s] CRITICAL: failed to sign zone %s: %s", worker->name, task->owner, ods_status2str(status));
289 }
290 task->backoff = duration2time(zone->signconf->sig_resign_interval);
291 return time_now() + duration2time(zone->signconf->sig_resign_interval);
292 } else {
293 ods_log_error("[%s] unable to sign zone %s: failed to increment serial", worker->name, task->owner);
294 ods_log_crit("[%s] CRITICAL: failed to sign zone %s: %s", worker->name, task->owner, ods_status2str(status));
295 return schedule_DEFER;
296 }
297 }
298 /* start timer */
299 start = time(NULL);
300 if (zone->stats) {
301 pthread_mutex_lock(&zone->stats->stats_lock);
302 if (!zone->stats->start_time) {
303 zone->stats->start_time = start;
304 }
305 zone->stats->sig_count = 0;
306 zone->stats->sig_soa_count = 0;
307 zone->stats->sig_reuse = 0;
308 zone->stats->sig_time = 0;
309 pthread_mutex_unlock(&zone->stats->stats_lock);
310 }
311 /* check the HSM connection before queuing sign operations */
312 if (hsm_check_context()) {
313 ods_log_error("signer instructed to reload due to hsm reset in sign task");
315 pthread_mutex_lock(&engine->signal_lock);
316 pthread_cond_signal(&engine->signal_cond);
317 pthread_mutex_unlock(&engine->signal_lock);
318 ods_log_crit("[%s] CRITICAL: failed to sign zone %s: %s", worker->name, task->owner, ods_status2str(status));
319 return schedule_DEFER; /* backoff */
320 }
321 /* prepare keys */
322 status = zone_prepare_keys(zone);
323 if (status == ODS_STATUS_OK) {
324 /* queue menial, hard signing work */
325 worker_queue_zone(context, worker->taskq->signq, zone, &nsubtasks);
326 ods_log_deeebug("[%s] wait until drudgers are finished "
327 "signing zone %s", worker->name, task->owner);
328 /* sleep until work is done */
329 fifoq_waitfor(context->signq, worker, nsubtasks, &nsubtasksfailed);
330 }
331 /* stop timer */
332 end = time(NULL);
333 /* check status and jobs */
334 if (status == ODS_STATUS_OK) {
335 status = worker_check_jobs(worker, task, nsubtasks, nsubtasksfailed);
336 }
337 if (status == ODS_STATUS_OK && zone->stats) {
338 pthread_mutex_lock(&zone->stats->stats_lock);
339 zone->stats->sig_time = (end - start);
340 pthread_mutex_unlock(&zone->stats->stats_lock);
341 }
342 if (status != ODS_STATUS_OK) {
343 ods_log_crit("[%s] CRITICAL: failed to sign zone %s: %s", worker->name, task->owner, ods_status2str(status));
344 return schedule_DEFER; /* backoff */
345 }
346
347 schedule_scheduletask(engine->taskq, TASK_WRITE, zone->name, zone, &zone->zone_lock, schedule_PROMPTLY);
348 return schedule_SUCCESS;
349}
350
351time_t
352do_readzone(task_type* task, const char* zonename, void* zonearg, void *contextarg)
353{
354 ods_status status = ODS_STATUS_OK;
355 struct worker_context* context = contextarg;
356 engine_type* engine = context->engine;
357 zone_type* zone = zonearg;
358 /* perform 'read input adapter' task */
359 if (!zone->signconf->last_modified) {
360 ods_log_debug("no signconf.xml for zone %s yet", task->owner);
361 status = ODS_STATUS_ERR;
362 }
363 if (status == ODS_STATUS_OK) {
364 status = tools_input(zone);
365 if (status == ODS_STATUS_UNCHANGED) {
366 ods_log_verbose("zone %s unsigned data not changed, continue", task->owner);
367 status = ODS_STATUS_OK;
368 }
369 }
370 if (status != ODS_STATUS_OK) {
371 if (!zone->signconf->last_modified) {
372 ods_log_warning("WARNING: unable to sign zone %s, signconf is not ready", task->owner);
373 return schedule_DEFER;
374 } else if (status != ODS_STATUS_XFR_NOT_READY) {
375 /* other statuses is critical, and we know it is not ODS_STATUS_OK */
376 if(!strcmp(zone->signconf->soa_serial,"keep") && (status == ODS_STATUS_FOPEN_ERR || status == ODS_STATUS_CONFLICT_ERR)) {
377 if(task->backoff > 0) {
378 ods_log_crit("CRITICAL: repeatedly failed to sign zone %s: %s", task->owner, ods_status2str(status));
379 } else {
380 ods_log_warning("Warning: failed to sign zone %s: %s", task->owner, ods_status2str(status));
381 }
382 task->backoff = duration2time(zone->signconf->sig_resign_interval);
383 return time_now() + duration2time(zone->signconf->sig_resign_interval);
384 } else {
385 ods_log_crit("CRITICAL: failed to sign zone %s: %s", task->owner, ods_status2str(status));
386 return schedule_DEFER;
387 }
388 }
389 } else {
390 /* unscheduling an existing sign task should no be necessary. After a read (this action)
391 * the logical next step is a sign. No other regular procedure that does not explicitly
392 * remove a sign task could create a sign task for this zone. So here we would be able
393 * to assume there is no sign task. However it occurs. The original code before refactoring
394 * also removed sign tasks. My premis this is caused by the locking code. A task actually
395 * starts executing even though the zone is being processed from another task. So for
396 * instance performing a force signconf just before a read task starts, can load to the read
397 * task to start executing even though the signconf task was still running. The forced signconf
398 * task cannot remove the read task (it is no longer queued), but will schedule a sign task.
399 * The read task can then continue, finding the just created sign task in its path.
400 */
401 schedule_unscheduletask(engine->taskq, TASK_SIGN, zone->name);
402 schedule_scheduletask(engine->taskq, TASK_SIGN, zone->name, zone, &zone->zone_lock, schedule_PROMPTLY);
403 return schedule_SUCCESS;
404 }
405}
406
407time_t
408do_forcereadzone(task_type* task, const char* zonename, void* zonearg, void *contextarg)
409{
410 ods_status status = ODS_STATUS_OK;
411 struct worker_context* context = contextarg;
412 engine_type* engine = context->engine;
413 zone_type* zone = zonearg;
414 /* perform 'read input adapter' task */
415 if (!zone->signconf->last_modified) {
416 ods_log_debug("no signconf.xml for zone %s yet", task->owner);
417 status = ODS_STATUS_ERR;
418 }
419 if (status == ODS_STATUS_OK) {
420 status = tools_input(zone);
421 if (status == ODS_STATUS_UNCHANGED) {
422 ods_log_verbose("zone %s unsigned data not changed, continue", task->owner);
423 status = ODS_STATUS_OK;
424 }
425 }
426 if (status != ODS_STATUS_OK) {
427 if (!zone->signconf->last_modified) {
428 ods_log_warning("WARNING: unable to sign zone %s, signconf is not ready", task->owner);
429 } else if (status != ODS_STATUS_XFR_NOT_READY) {
430 /* other statuses is critical, and we know it is not ODS_STATUS_OK */
431 if(!strcmp(zone->signconf->soa_serial,"keep") && (status == ODS_STATUS_FOPEN_ERR || status == ODS_STATUS_CONFLICT_ERR)) {
432 if(task->backoff > 0) {
433 ods_log_crit("CRITICAL: repeatedly failed to sign zone %s: %s", task->owner, ods_status2str(status));
434 } else {
435 ods_log_warning("Warning: failed to sign zone %s: %s", task->owner, ods_status2str(status));
436 }
437 task->backoff = duration2time(zone->signconf->sig_resign_interval);
438 return time_now() + duration2time(zone->signconf->sig_resign_interval);
439 } else {
440 ods_log_crit("CRITICAL: failed to sign zone %s: %s", task->owner, ods_status2str(status));
441 return schedule_DEFER;
442 }
443 }
444 return schedule_SUCCESS;
445 } else {
446 schedule_unscheduletask(engine->taskq, TASK_SIGNCONF, zone->name);
447 schedule_unscheduletask(engine->taskq, TASK_FORCEREAD, zone->name);
448 schedule_unscheduletask(engine->taskq, TASK_READ, zone->name);
449 schedule_unscheduletask(engine->taskq, TASK_SIGN, zone->name);
450 schedule_unscheduletask(engine->taskq, TASK_WRITE, zone->name);
451 schedule_scheduletask(engine->taskq, TASK_SIGN, zone->name, zone, &zone->zone_lock, schedule_PROMPTLY);
452 return schedule_SUCCESS;
453 }
454}
455
456time_t
457do_writezone(task_type* task, const char* zonename, void* zonearg, void *contextarg)
458{
459 struct worker_context* context = contextarg;
460 engine_type* engine = context->engine;
461 worker_type* worker = context->worker;
462 zone_type* zone = zonearg;
463 ods_status status;
464 time_t resign;
465 context->clock_in = time_now(); /* TODO this means something different */
466 /* perform write to output adapter task */
467 status = tools_output(zone, engine);
468 if (status != ODS_STATUS_OK) {
469 ods_log_crit("[%s] CRITICAL: failed to sign zone %s: %s",
470 worker->name, task->owner, ods_status2str(status));
471 return schedule_DEFER;
472 }
473 if (zone->signconf &&
474 duration2time(zone->signconf->sig_resign_interval)) {
475 resign = context->clock_in +
476 duration2time(zone->signconf->sig_resign_interval);
477 } else {
478 ods_log_error("[%s] unable to retrieve resign interval "
479 "for zone %s: duration2time() failed",
480 worker->name, task->owner);
481 ods_log_info("[%s] defaulting to 1H resign interval for "
482 "zone %s", worker->name, task->owner);
483 resign = context->clock_in + 3600;
484 }
485 /* backup the last successful run */
486 status = zone_backup2(zone, resign);
487 if (status != ODS_STATUS_OK) {
488 ods_log_warning("[%s] unable to backup zone %s: %s",
489 worker->name, task->owner, ods_status2str(status));
490 /* just a warning */
491 status = ODS_STATUS_OK;
492 }
493 schedule_scheduletask(engine->taskq, TASK_SIGN, zone->name, zone, &zone->zone_lock, resign);
494 return schedule_SUCCESS;
495}
ods_status rrset_sign(hsm_ctx_t *ctx, rrset_type *rrset, time_t signtime)
Definition: rrset.c:570
time_t do_forcereadzone(task_type *task, const char *zonename, void *zonearg, void *contextarg)
Definition: signertasks.c:408
time_t do_readzone(task_type *task, const char *zonename, void *zonearg, void *contextarg)
Definition: signertasks.c:352
void drudge(worker_type *worker)
Definition: signertasks.c:152
time_t do_forcereadsignconf(task_type *task, const char *zonename, void *zonearg, void *contextarg)
Definition: signertasks.c:239
time_t do_readsignconf(task_type *task, const char *zonename, void *zonearg, void *contextarg)
Definition: signertasks.c:209
time_t do_signzone(task_type *task, const char *zonename, void *zonearg, void *contextarg)
Definition: signertasks.c:268
time_t do_writezone(task_type *task, const char *zonename, void *zonearg, void *contextarg)
Definition: signertasks.c:457
rrset_type * rrset
Definition: denial.h:55
rrset_type * rrsets
Definition: domain.h:58
denial_type * denial
Definition: domain.h:53
schedule_type * taskq
Definition: engine.h:54
pthread_mutex_t signal_lock
Definition: engine.h:67
pthread_cond_t signal_cond
Definition: engine.h:66
int need_to_reload
Definition: engine.h:63
ldns_rbtree_t * domains
Definition: namedb.h:51
rrset_type * next
Definition: rrset.h:60
const char * soa_serial
Definition: signconf.h:68
duration_type * sig_resign_interval
Definition: signconf.h:46
time_t last_modified
Definition: signconf.h:72
uint32_t sig_reuse
Definition: stats.h:58
pthread_mutex_t stats_lock
Definition: stats.h:63
uint32_t sig_soa_count
Definition: stats.h:57
time_t sig_time
Definition: stats.h:59
uint32_t sig_count
Definition: stats.h:56
time_t start_time
Definition: stats.h:61
worker_type * worker
Definition: signertasks.h:40
engine_type * engine
Definition: signertasks.h:39
fifoq_type * signq
Definition: signertasks.h:41
time_t clock_in
Definition: signertasks.h:42
signconf_type * signconf
Definition: zone.h:77
stats_type * stats
Definition: zone.h:85
namedb_type * db
Definition: zone.h:79
int zoneconfigvalid
Definition: zone.h:90
const char * name
Definition: zone.h:69
pthread_mutex_t zone_lock
Definition: zone.h:86
ods_status tools_signconf(zone_type *zone)
Definition: tools.c:52
ods_status tools_output(zone_type *zone, engine_type *engine)
Definition: tools.c:181
ods_status tools_input(zone_type *zone)
Definition: tools.c:93
ods_status zone_backup2(zone_type *zone, time_t nextResign)
Definition: zone.c:1040
ods_status zone_update_serial(zone_type *zone)
Definition: zone.c:434
ods_status zone_prepare_keys(zone_type *zone)
Definition: zone.c:393