blob: 90831d0559470b2473f6c55d1929f1fa9c786459 [file] [log] [blame]
{-| Incident handling in the maintenance daemon.
This module implements the submission of actions for ongoing
repair events reported by the node-status data collector.
-}
{-
Copyright (C) 2015 Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-}
module Ganeti.MaintD.HandleIncidents
( handleIncidents
) where
import Control.Arrow ((&&&))
import Control.Exception.Lifted (bracket)
import Control.Lens.Setter (over)
import Control.Monad (foldM)
import Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString.UTF8 as UTF8
import Data.Function (on)
import Data.IORef (IORef)
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Text.JSON as J
import Ganeti.BasicTypes ( GenericResult(..), ResultT, mkResultT, Down(..))
import qualified Ganeti.Constants as C
import Ganeti.HTools.AlgorithmParams (AlgorithmOptions(..), defaultOptions)
import Ganeti.HTools.Cluster.Evacuate (tryNodeEvac, EvacSolution(..))
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Group as Group
import qualified Ganeti.HTools.Instance as Instance
import qualified Ganeti.HTools.Node as Node
import Ganeti.HTools.Types (Idx)
import Ganeti.JQueue (currentTimestamp)
import Ganeti.Jobs (execJobsWaitOkJid, submitJobs, forceFailover)
import Ganeti.Logging.Lifted
import qualified Ganeti.Luxi as L
import Ganeti.MaintD.MemoryState ( MemoryState, getIncidents, rmIncident
, updateIncident, appendJobs)
import Ganeti.MaintD.Utils (annotateOpCode, getRepairCommand)
import Ganeti.Objects.Lens (incidentJobsL)
import Ganeti.Objects.Maintenance ( RepairStatus(..), RepairAction(..)
, Incident(..))
import Ganeti.OpCodes (OpCode(..), MetaOpCode)
import qualified Ganeti.Path as Path
import Ganeti.Types ( cTimeOf, uuidOf, mkNonEmpty, fromJobId
, EvacMode(..), TagKind(..))
import Ganeti.Utils (maxBy, logAndBad)
-- | Given two incidents, choose the more severe one; for equally severe
-- ones the older (by creation timestamp).
moreSevereIncident :: Incident -> Incident -> Incident
moreSevereIncident = maxBy (compare `on` incidentAction &&& (Down . cTimeOf))
-- | From a given list of incidents, return, for each node,
-- the one with the most severe action.
rankIncidents :: [Incident] -> Map.Map String Incident
rankIncidents = foldl (\m i -> Map.insertWith moreSevereIncident
(incidentNode i) i m) Map.empty
-- | Generate a job to drain a given node.
drainJob :: String -> ResultT String IO [ MetaOpCode ]
drainJob name = do
name' <- mkNonEmpty name
now <- liftIO currentTimestamp
return $ map (annotateOpCode ("Draining " ++ name) now)
[ OpNodeSetParams { opNodeName = name'
, opNodeUuid = Nothing
, opForce = True
, opHvState = Nothing
, opDiskState = Nothing
, opMasterCandidate = Nothing
, opOffline = Nothing
, opDrained = Just True
, opAutoPromote = False
, opMasterCapable = Nothing
, opVmCapable = Nothing
, opSecondaryIp = Nothing
, opgenericNdParams = Nothing
, opPowered = Nothing
, opVerbose = False
, opDebug = False
} ]
-- | Submit and register the next job for a node evacuation.
handleEvacuation :: L.Client -- ^ Luxi client to use
-> IORef MemoryState -- ^ memory state of the daemon
-> (Group.List, Node.List, Instance.List) -- ^ cluster
-> Idx -- ^ index of the node to evacuate
-> Bool -- ^ whether to try migrations
-> Set.Set Int -- ^ allowed nodes for evacuation
-> Incident -- ^ the incident
-> ResultT String IO (Set.Set Int) -- ^ nodes still available
handleEvacuation client memst (gl, nl, il) ndx migrate freenodes incident = do
let node = Container.find ndx nl
name = Node.name node
fNdNames = map (Node.name . flip Container.find nl) $ Set.elems freenodes
evacOpts = defaultOptions { algEvacMode = True
, algIgnoreSoftErrors = True
, algRestrictToNodes = Just fNdNames
}
evacFun = tryNodeEvac evacOpts gl nl il
migrateFun = if migrate then id else forceFailover
annotateFun = annotateOpCode $ "Evacuating " ++ name
pendingIncident = incident { incidentRepairStatus = RSPending }
updateJobs jids_r = case jids_r of
Ok jids -> do
let incident' = over incidentJobsL (++ jids) pendingIncident
liftIO $ updateIncident memst incident'
liftIO $ appendJobs memst jids
logDebug $ "Jobs submitted: " ++ show (map fromJobId jids)
Bad e -> mkResultT . logAndBad
$ "Failure evacuating " ++ name ++ ": " ++ e
logInstName i = logInfo $ "Evacuating instance "
++ Instance.name (Container.find i il)
++ " from " ++ name
execSol sol = do
now <- liftIO currentTimestamp
let jobs = map (map (annotateFun now . migrateFun)) $ esOpCodes sol
jids <- liftIO $ submitJobs jobs client
updateJobs jids
let touched = esMoved sol >>= \(_, _, nidxs) -> nidxs
return $ freenodes Set.\\ Set.fromList touched
logDebug $ "Handling evacuation of " ++ name
case () of _ | not $ Node.offline node -> do
logDebug $ "Draining node " ++ name
job <- drainJob name
jids <- liftIO $ submitJobs [job] client
updateJobs jids
return freenodes
| i:_ <- Node.pList node -> do
logInstName i
(_, _, sol) <- mkResultT . return $ evacFun ChangePrimary [i]
execSol sol
| i:_ <- Node.sList node -> do
logInstName i
(_, _, sol) <- mkResultT . return
$ evacFun ChangeSecondary [i]
execSol sol
| otherwise -> do
logDebug $ "Finished evacuation of " ++ name
now <- liftIO currentTimestamp
jids <- mkResultT $ execJobsWaitOkJid
[[ annotateFun now
. OpTagsSet TagKindNode [ incidentTag incident ]
$ Just name]] client
let incident' = over incidentJobsL (++ jids)
$ incident { incidentRepairStatus =
RSCompleted }
liftIO $ updateIncident memst incident'
liftIO $ appendJobs memst jids
return freenodes
-- | Submit the next action for a live-repair incident.
handleLiveRepairs :: L.Client -- ^ Luxi client to use
-> IORef MemoryState -- ^ memory state of the daemon
-> Idx -- ^ the node to handle the event on
-> Set.Set Int -- ^ unaffected nodes
-> Incident -- ^ the incident
-> ResultT String IO (Set.Set Int) -- ^ nodes still available
handleLiveRepairs client memst ndx freenodes incident = do
let maybeCmd = getRepairCommand incident
uuid = incidentUuid incident
name = incidentNode incident
now <- liftIO currentTimestamp
logDebug $ "Handling requested command " ++ show maybeCmd ++ " on " ++ name
case () of
_ | null $ incidentJobs incident,
Just cmd <- maybeCmd,
cmd /= "" -> do
logDebug "Submitting repair command job"
name' <- mkNonEmpty name
cmd' <- mkNonEmpty cmd
orig' <- mkNonEmpty . J.encode $ incidentOriginal incident
jids_r <- liftIO $ submitJobs
[[ annotateOpCode "repair command requested by node" now
OpRepairCommand { opNodeName = name'
, opRepairCommand = cmd'
, opInput = Just orig'
} ]] client
case jids_r of
Ok jids -> do
let incident' = over incidentJobsL (++ jids) incident
liftIO $ updateIncident memst incident'
liftIO $ appendJobs memst jids
logDebug $ "Jobs submitted: " ++ show (map fromJobId jids)
Bad e -> mkResultT . logAndBad
$ "Failure requesting command " ++ cmd ++ " on " ++ name
++ ": " ++ e
| null $ incidentJobs incident -> do
logInfo $ "Marking incident " ++ UTF8.toString uuid ++ " as failed;"
++ " command for live repair not specified"
let newtag = C.maintdFailureTagPrefix ++ UTF8.toString uuid
jids <- mkResultT $ execJobsWaitOkJid
[[ annotateOpCode "marking incident as ill specified" now
. OpTagsSet TagKindNode [ newtag ]
$ Just name ]] client
let incident' = over incidentJobsL (++ jids)
$ incident { incidentRepairStatus = RSFailed
, incidentTag = newtag
}
liftIO $ updateIncident memst incident'
liftIO $ appendJobs memst jids
| otherwise -> do
logDebug "Command execution has succeeded"
jids <- mkResultT $ execJobsWaitOkJid
[[ annotateOpCode "repair command requested by node" now
. OpTagsSet TagKindNode [ incidentTag incident ]
$ Just name ]] client
let incident' = over incidentJobsL (++ jids)
$ incident { incidentRepairStatus = RSCompleted }
liftIO $ updateIncident memst incident'
liftIO $ appendJobs memst jids
return $ Set.delete ndx freenodes
-- | Submit the next actions for a single incident, given the unaffected nodes;
-- register all submitted jobs and return the new set of unaffected nodes.
handleIncident :: L.Client
-> IORef MemoryState
-> (Group.List, Node.List, Instance.List)
-> Set.Set Int
-> (String, Incident)
-> ResultT String IO (Set.Set Int)
handleIncident client memstate (gl, nl, il) freeNodes (name, incident) = do
ndx <- case Container.keys $ Container.filter ((==) name . Node.name) nl of
[ndx] -> return ndx
[] -> do
logWarning $ "Node " ++ name ++ " no longer in the cluster;"
++ " clearing incident " ++ show incident
liftIO . rmIncident memstate $ uuidOf incident
fail $ "node " ++ name ++ " left the cluster"
ndxs -> do
logWarning $ "Abmigious node name " ++ name
++ "; could refer to indices " ++ show ndxs
fail $ "ambigious name " ++ name
case incidentAction incident of
RANoop -> do
logDebug $ "Nothing to do for " ++ show incident
liftIO . rmIncident memstate $ uuidOf incident
return freeNodes
RALiveRepair ->
handleLiveRepairs client memstate ndx freeNodes incident
RAEvacuate ->
handleEvacuation client memstate (gl, nl, il) ndx True freeNodes incident
RAEvacuateFailover ->
handleEvacuation client memstate (gl, nl, il) ndx False freeNodes incident
-- | Submit the jobs necessary for the next maintenance step
-- for each pending maintenance, i.e., the most radical maintenance
-- for each node. Return the set of node indices unaffected by these
-- operations. Also, for each job submitted, register it directly.
handleIncidents :: IORef MemoryState
-> (Group.List, Node.List, Instance.List)
-> ResultT String IO (Set.Set Int)
handleIncidents memstate (gl, nl, il) = do
incidents <- getIncidents memstate
let activeIncidents = filter ((<= RSPending) . incidentRepairStatus) incidents
incidentsToHandle = rankIncidents activeIncidents
incidentNodes = Set.fromList . Container.keys
$ Container.filter ((`Map.member` incidentsToHandle) . Node.name) nl
freeNodes = Set.fromList (Container.keys nl) Set.\\ incidentNodes
if null activeIncidents
then return freeNodes
else do
luxiSocket <- liftIO Path.defaultQuerySocket
bracket (liftIO $ L.getLuxiClient luxiSocket)
(liftIO . L.closeClient)
$ \ client ->
foldM (handleIncident client memstate (gl, nl, il)) freeNodes
$ Map.assocs incidentsToHandle