Prevent the watcher from submitting too many jobs

When the watcher runs on each node group, if it can obtain the group
lock, it submits a GROUP_VERIFY_DISKS job for each group. This happens
every 5 minutes due to a cron job. This patch stops the watcher from
submitting unnecessary verify disks jobs if there are some already
pending in the queue to prevent job congestion.

Signed-off-by: Federico Morg Pareschi <morg@google.com>
Reviewed-by: Brian Foley <bpfoley@google.com>
diff --git a/lib/watcher/__init__.py b/lib/watcher/__init__.py
index 4e946b3..881ac83 100644
--- a/lib/watcher/__init__.py
+++ b/lib/watcher/__init__.py
@@ -345,10 +345,34 @@
   return compat.any(nodes[node_name].offline for node_name in instance.snodes)
 
 
+def _GetPendingVerifyDisks(cl, uuid):
+  """Checks if there are any currently running or pending group verify jobs and
+  if so, returns their id.
+
+  """
+  qfilter = qlang.MakeSimpleFilter("status",
+                                    frozenset([constants.JOB_STATUS_RUNNING,
+                                               constants.JOB_STATUS_QUEUED,
+                                               constants.JOB_STATUS_WAITING]))
+  qresult = cl.Query(constants.QR_JOB, ["id", "summary"], qfilter)
+
+  ids = [jobid for ((_, jobid), (_, (job, ))) in qresult.data
+         if job == ("GROUP_VERIFY_DISKS(%s)" % uuid)]
+  return ids
+
+
 def _VerifyDisks(cl, uuid, nodes, instances):
   """Run a per-group "gnt-cluster verify-disks".
 
   """
+
+  existing_jobs = _GetPendingVerifyDisks(cl, uuid)
+  if existing_jobs:
+    logging.info("There are verify disks jobs already pending (%s), skipping "
+                 "VerifyDisks step for %s.",
+                 utils.CommaJoin(existing_jobs), uuid)
+    return
+
   op = opcodes.OpGroupVerifyDisks(
     group_name=uuid, priority=constants.OP_PRIO_LOW)
   op.reason = [(constants.OPCODE_REASON_SRC_WATCHER,