| {-# LANGUAGE TupleSections #-} |
| |
| {-| Auto-repair tool for Ganeti. |
| |
| -} |
| |
| {- |
| |
| Copyright (C) 2013 Google Inc. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2 of the License, or |
| (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
| 02110-1301, USA. |
| |
| -} |
| |
| module Ganeti.HTools.Program.Harep |
| ( main |
| , arguments |
| , options) where |
| |
| import Control.Exception (bracket) |
| import Control.Monad |
| import Data.Function |
| import Data.List |
| import Data.Maybe |
| import Data.Ord |
| import System.Time |
| import qualified Data.Map as Map |
| |
| import Ganeti.BasicTypes |
| import Ganeti.Common |
| import Ganeti.Errors |
| import Ganeti.Jobs |
| import Ganeti.OpCodes |
| import Ganeti.OpParams |
| import Ganeti.Types |
| import Ganeti.Utils |
| import qualified Ganeti.Constants as C |
| import qualified Ganeti.Luxi as L |
| import qualified Ganeti.Path as Path |
| |
| import Ganeti.HTools.CLI |
| import Ganeti.HTools.Loader |
| import Ganeti.HTools.ExtLoader |
| import Ganeti.HTools.Types |
| import qualified Ganeti.HTools.Container as Container |
| import qualified Ganeti.HTools.Instance as Instance |
| import qualified Ganeti.HTools.Node as Node |
| |
| -- | Options list and functions. |
| options :: IO [OptType] |
| options = do |
| luxi <- oLuxiSocket |
| return |
| [ luxi |
| , oJobDelay |
| ] |
| |
| arguments :: [ArgCompletion] |
| arguments = [] |
| |
| data InstanceData = InstanceData { arInstance :: Instance.Instance |
| , arState :: AutoRepairStatus |
| , tagsToRemove :: [String] |
| } |
| deriving (Eq, Show) |
| |
| -- | Parse a tag into an 'AutoRepairData' record. |
| -- |
| -- @Nothing@ is returned if the tag is not an auto-repair tag, or if it's |
| -- malformed. |
| parseInitTag :: String -> Maybe AutoRepairData |
| parseInitTag tag = |
| let parsePending = do |
| subtag <- chompPrefix C.autoRepairTagPending tag |
| case sepSplit ':' subtag of |
| [rtype, uuid, ts, jobs] -> makeArData rtype uuid ts jobs |
| _ -> fail ("Invalid tag: " ++ show tag) |
| |
| parseResult = do |
| subtag <- chompPrefix C.autoRepairTagResult tag |
| case sepSplit ':' subtag of |
| [rtype, uuid, ts, result, jobs] -> do |
| arData <- makeArData rtype uuid ts jobs |
| result' <- autoRepairResultFromRaw result |
| return arData { arResult = Just result' } |
| _ -> fail ("Invalid tag: " ++ show tag) |
| |
| makeArData rtype uuid ts jobs = do |
| rtype' <- autoRepairTypeFromRaw rtype |
| ts' <- tryRead "auto-repair time" ts |
| jobs' <- mapM makeJobIdS $ sepSplit '+' jobs |
| return AutoRepairData { arType = rtype' |
| , arUuid = uuid |
| , arTime = TOD ts' 0 |
| , arJobs = jobs' |
| , arResult = Nothing |
| , arTag = tag |
| } |
| in |
| parsePending `mplus` parseResult |
| |
| -- | Return the 'AutoRepairData' element of an 'AutoRepairStatus' type. |
| getArData :: AutoRepairStatus -> Maybe AutoRepairData |
| getArData status = |
| case status of |
| ArHealthy (Just d) -> Just d |
| ArFailedRepair d -> Just d |
| ArPendingRepair d -> Just d |
| ArNeedsRepair d -> Just d |
| _ -> Nothing |
| |
| -- | Return a short name for each auto-repair status. |
| -- |
| -- This is a more concise representation of the status, because the default |
| -- "Show" formatting includes all the accompanying auto-repair data. |
| arStateName :: AutoRepairStatus -> String |
| arStateName status = |
| case status of |
| ArHealthy _ -> "Healthy" |
| ArFailedRepair _ -> "Failure" |
| ArPendingRepair _ -> "Pending repair" |
| ArNeedsRepair _ -> "Needs repair" |
| |
| -- | Return a new list of tags to remove that includes @arTag@ if present. |
| delCurTag :: InstanceData -> [String] |
| delCurTag instData = |
| let arData = getArData $ arState instData |
| rmTags = tagsToRemove instData |
| in |
| case arData of |
| Just d -> arTag d : rmTags |
| Nothing -> rmTags |
| |
| -- | Set the initial auto-repair state of an instance from its auto-repair tags. |
| -- |
| -- The rules when there are multiple tags is: |
| -- |
| -- * the earliest failure result always wins |
| -- |
| -- * two or more pending repairs results in a fatal error |
| -- |
| -- * a pending result from id X and a success result from id Y result in error |
| -- if Y is newer than X |
| -- |
| -- * if there are no pending repairs, the newest success result wins, |
| -- otherwise the pending result is used. |
| setInitialState :: Instance.Instance -> Result InstanceData |
| setInitialState inst = |
| let arData = mapMaybe parseInitTag $ Instance.allTags inst |
| -- Group all the AutoRepairData records by id (i.e. by repair task), and |
| -- present them from oldest to newest. |
| arData' = sortBy (comparing arUuid) arData |
| arGroups = groupBy ((==) `on` arUuid) arData' |
| arGroups' = sortBy (comparing $ minimum . map arTime) arGroups |
| in |
| foldM arStatusCmp (InstanceData inst (ArHealthy Nothing) []) arGroups' |
| |
| -- | Update the initial status of an instance with new repair task tags. |
| -- |
| -- This function gets called once per repair group in an instance's tag, and it |
| -- determines whether to set the status of the instance according to this new |
| -- group, or to keep the existing state. See the documentation for |
| -- 'setInitialState' for the rules to be followed when determining this. |
| arStatusCmp :: InstanceData -> [AutoRepairData] -> Result InstanceData |
| arStatusCmp instData arData = |
| let curSt = arState instData |
| arData' = sortBy (comparing keyfn) arData |
| keyfn d = (arResult d, arTime d) |
| newData = last arData' |
| newSt = case arResult newData of |
| Just ArSuccess -> ArHealthy $ Just newData |
| Just ArEnoperm -> ArHealthy $ Just newData |
| Just ArFailure -> ArFailedRepair newData |
| Nothing -> ArPendingRepair newData |
| in |
| case curSt of |
| ArFailedRepair _ -> Ok instData -- Always keep the earliest failure. |
| ArHealthy _ -> Ok instData { arState = newSt |
| , tagsToRemove = delCurTag instData |
| } |
| ArPendingRepair d -> Bad ( |
| "An unfinished repair was found in instance " ++ |
| Instance.name (arInstance instData) ++ ": found tag " ++ |
| show (arTag newData) ++ ", but older pending tag " ++ |
| show (arTag d) ++ "exists.") |
| |
| ArNeedsRepair _ -> Bad |
| "programming error: ArNeedsRepair found as an initial state" |
| |
| -- | Query jobs of a pending repair, returning the new instance data. |
| processPending :: L.Client -> InstanceData -> IO InstanceData |
| processPending client instData = |
| case arState instData of |
| (ArPendingRepair arData) -> do |
| sts <- L.queryJobsStatus client $ arJobs arData |
| time <- getClockTime |
| case sts of |
| Bad e -> exitErr $ "could not check job status: " ++ formatError e |
| Ok sts' -> |
| if any (<= JOB_STATUS_RUNNING) sts' then |
| return instData -- (no change) |
| else do |
| let iname = Instance.name $ arInstance instData |
| srcSt = arStateName $ arState instData |
| destSt = arStateName arState' |
| putStrLn ("Moving " ++ iname ++ " from " ++ show srcSt ++ " to " ++ |
| show destSt) |
| commitChange client instData' |
| where |
| instData' = |
| instData { arState = arState' |
| , tagsToRemove = delCurTag instData |
| } |
| arState' = |
| if all (== JOB_STATUS_SUCCESS) sts' then |
| ArHealthy $ Just (updateTag $ arData { arResult = Just ArSuccess |
| , arTime = time }) |
| else |
| ArFailedRepair (updateTag $ arData { arResult = Just ArFailure |
| , arTime = time }) |
| |
| _ -> return instData |
| |
| -- | Update the tag of an 'AutoRepairData' record to match all the other fields. |
| updateTag :: AutoRepairData -> AutoRepairData |
| updateTag arData = |
| let ini = [autoRepairTypeToRaw $ arType arData, |
| arUuid arData, |
| clockTimeToString $ arTime arData] |
| end = [intercalate "+" . map (show . fromJobId) $ arJobs arData] |
| (pfx, middle) = |
| case arResult arData of |
| Nothing -> (C.autoRepairTagPending, []) |
| Just rs -> (C.autoRepairTagResult, [autoRepairResultToRaw rs]) |
| in |
| arData { arTag = pfx ++ intercalate ":" (ini ++ middle ++ end) } |
| |
| -- | Apply and remove tags from an instance as indicated by 'InstanceData'. |
| -- |
| -- If the /arState/ of the /InstanceData/ record has an associated |
| -- 'AutoRepairData', add its tag to the instance object. Additionally, if |
| -- /tagsToRemove/ is not empty, remove those tags from the instance object. The |
| -- returned /InstanceData/ object always has an empty /tagsToRemove/. |
| commitChange :: L.Client -> InstanceData -> IO InstanceData |
| commitChange client instData = do |
| let iname = Instance.name $ arInstance instData |
| arData = getArData $ arState instData |
| rmTags = tagsToRemove instData |
| execJobsWaitOk' opcodes = do |
| res <- execJobsWaitOk [map wrapOpCode opcodes] client |
| case res of |
| Ok _ -> return () |
| Bad e -> exitErr e |
| |
| when (isJust arData) $ do |
| let tag = arTag $ fromJust arData |
| putStrLn (">>> Adding the following tag to " ++ iname ++ ":\n" ++ show tag) |
| execJobsWaitOk' [OpTagsSet (TagInstance iname) [tag]] |
| |
| unless (null rmTags) $ do |
| putStr (">>> Removing the following tags from " ++ iname ++ ":\n" ++ |
| unlines (map show rmTags)) |
| execJobsWaitOk' [OpTagsDel (TagInstance iname) rmTags] |
| |
| return instData { tagsToRemove = [] } |
| |
| -- | Detect brokenness with an instance and suggest repair type and jobs to run. |
| detectBroken :: Node.List -> Instance.Instance |
| -> Maybe (AutoRepairType, [OpCode]) |
| detectBroken nl inst = |
| let disk = Instance.diskTemplate inst |
| iname = Instance.name inst |
| offPri = Node.offline $ Container.find (Instance.pNode inst) nl |
| offSec = Node.offline $ Container.find (Instance.sNode inst) nl |
| in |
| case disk of |
| DTDrbd8 |
| | offPri && offSec -> |
| Just ( |
| ArReinstall, |
| [ OpInstanceRecreateDisks { opInstanceName = iname |
| , opInstanceUuid = Nothing |
| , opRecreateDisksInfo = RecreateDisksAll |
| , opNodes = [] |
| -- FIXME: there should be a better way to |
| -- specify opcode parameters than abusing |
| -- mkNonEmpty in this way (using the fact |
| -- that Maybe is used both for optional |
| -- fields, and to express failure). |
| , opNodeUuids = Nothing |
| , opIallocator = mkNonEmpty "hail" |
| } |
| , OpInstanceReinstall { opInstanceName = iname |
| , opInstanceUuid = Nothing |
| , opOsType = Nothing |
| , opTempOsParams = Nothing |
| , opForceVariant = False |
| } |
| ]) |
| | offPri -> |
| Just ( |
| ArFailover, |
| [ OpInstanceFailover { opInstanceName = iname |
| , opInstanceUuid = Nothing |
| -- FIXME: ditto, see above. |
| , opShutdownTimeout = fromJust $ mkNonNegative |
| C.defaultShutdownTimeout |
| , opIgnoreConsistency = False |
| , opTargetNode = Nothing |
| , opTargetNodeUuid = Nothing |
| , opIgnoreIpolicy = False |
| , opIallocator = Nothing |
| , opMigrationCleanup = False |
| } |
| ]) |
| | offSec -> |
| Just ( |
| ArFixStorage, |
| [ OpInstanceReplaceDisks { opInstanceName = iname |
| , opInstanceUuid = Nothing |
| , opReplaceDisksMode = ReplaceNewSecondary |
| , opReplaceDisksList = [] |
| , opRemoteNode = Nothing |
| -- FIXME: ditto, see above. |
| , opRemoteNodeUuid = Nothing |
| , opIallocator = mkNonEmpty "hail" |
| , opEarlyRelease = False |
| , opIgnoreIpolicy = False |
| } |
| ]) |
| | otherwise -> Nothing |
| |
| DTPlain |
| | offPri -> |
| Just ( |
| ArReinstall, |
| [ OpInstanceRecreateDisks { opInstanceName = iname |
| , opInstanceUuid = Nothing |
| , opRecreateDisksInfo = RecreateDisksAll |
| , opNodes = [] |
| -- FIXME: ditto, see above. |
| , opNodeUuids = Nothing |
| , opIallocator = mkNonEmpty "hail" |
| } |
| , OpInstanceReinstall { opInstanceName = iname |
| , opInstanceUuid = Nothing |
| , opOsType = Nothing |
| , opTempOsParams = Nothing |
| , opForceVariant = False |
| } |
| ]) |
| | otherwise -> Nothing |
| |
| _ -> Nothing -- Other cases are unimplemented for now: DTDiskless, |
| -- DTFile, DTSharedFile, DTBlock, DTRbd, DTExt. |
| |
| -- | Perform the suggested repair on an instance if its policy allows it. |
| doRepair :: L.Client -- ^ The Luxi client |
| -> Double -- ^ Delay to insert before the first repair opcode |
| -> InstanceData -- ^ The instance data |
| -> (AutoRepairType, [OpCode]) -- ^ The repair job to perform |
| -> IO InstanceData -- ^ The updated instance data |
| doRepair client delay instData (rtype, opcodes) = |
| let inst = arInstance instData |
| ipol = Instance.arPolicy inst |
| iname = Instance.name inst |
| in |
| case ipol of |
| ArEnabled maxtype -> |
| if rtype > maxtype then do |
| uuid <- newUUID |
| time <- getClockTime |
| |
| let arState' = ArNeedsRepair ( |
| updateTag $ AutoRepairData rtype uuid time [] (Just ArEnoperm) "") |
| instData' = instData { arState = arState' |
| , tagsToRemove = delCurTag instData |
| } |
| |
| putStrLn ("Not performing a repair of type " ++ show rtype ++ " on " ++ |
| iname ++ " because only repairs up to " ++ show maxtype ++ |
| " are allowed") |
| commitChange client instData' -- Adds "enoperm" result label. |
| else do |
| putStrLn ("Executing " ++ show rtype ++ " repair on " ++ iname) |
| |
| -- After submitting the job, we must write an autorepair:pending tag, |
| -- that includes the repair job IDs so that they can be checked later. |
| -- One problem we run into is that the repair job immediately grabs |
| -- locks for the affected instance, and the subsequent TAGS_SET job is |
| -- blocked, introducing an unnecessary delay for the end-user. One |
| -- alternative would be not to wait for the completion of the TAGS_SET |
| -- job, contrary to what commitChange normally does; but we insist on |
| -- waiting for the tag to be set so as to abort in case of failure, |
| -- because the cluster is left in an invalid state in that case. |
| -- |
| -- The proper solution (in 2.9+) would be not to use tags for storing |
| -- autorepair data, or make the TAGS_SET opcode not grab an instance's |
| -- locks (if that's deemed safe). In the meantime, we introduce an |
| -- artificial delay in the repair job (via a TestDelay opcode) so that |
| -- once we have the job ID, the TAGS_SET job can complete before the |
| -- repair job actually grabs the locks. (Please note that this is not |
| -- about synchronization, but merely about speeding up the execution of |
| -- the harep tool. If this TestDelay opcode is removed, the program is |
| -- still correct.) |
| let opcodes' = |
| if delay > 0 then |
| OpTestDelay { opDelayDuration = delay |
| , opDelayOnMaster = True |
| , opDelayOnNodes = [] |
| , opDelayOnNodeUuids = Nothing |
| , opDelayRepeat = fromJust $ mkNonNegative 0 |
| } : opcodes |
| else |
| opcodes |
| |
| uuid <- newUUID |
| time <- getClockTime |
| jids <- submitJobs [map wrapOpCode opcodes'] client |
| |
| case jids of |
| Bad e -> exitErr e |
| Ok jids' -> |
| let arState' = ArPendingRepair ( |
| updateTag $ AutoRepairData rtype uuid time jids' Nothing "") |
| instData' = instData { arState = arState' |
| , tagsToRemove = delCurTag instData |
| } |
| in |
| commitChange client instData' -- Adds "pending" label. |
| |
| otherSt -> do |
| putStrLn ("Not repairing " ++ iname ++ " because it's in state " ++ |
| show otherSt) |
| return instData |
| |
| -- | Main function. |
| main :: Options -> [String] -> IO () |
| main opts args = do |
| unless (null args) $ |
| exitErr "this program doesn't take any arguments." |
| |
| luxiDef <- Path.defaultLuxiSocket |
| let master = fromMaybe luxiDef $ optLuxi opts |
| opts' = opts { optLuxi = Just master } |
| |
| (ClusterData _ nl il _ _) <- loadExternalData opts' |
| |
| let iniDataRes = mapM setInitialState $ Container.elems il |
| iniData <- exitIfBad "when parsing auto-repair tags" iniDataRes |
| |
| -- First step: check all pending repairs, see if they are completed. |
| iniData' <- bracket (L.getClient master) L.closeClient $ |
| forM iniData . processPending |
| |
| -- Second step: detect any problems. |
| let repairs = map (detectBroken nl . arInstance) iniData' |
| |
| -- Third step: create repair jobs for broken instances that are in ArHealthy. |
| let maybeRepair c (i, r) = maybe (return i) (repairHealthy c i) r |
| jobDelay = optJobDelay opts |
| repairHealthy c i = case arState i of |
| ArHealthy _ -> doRepair c jobDelay i |
| _ -> const (return i) |
| |
| repairDone <- bracket (L.getClient master) L.closeClient $ |
| forM (zip iniData' repairs) . maybeRepair |
| |
| -- Print some stats and exit. |
| let states = map ((, 1 :: Int) . arStateName . arState) repairDone |
| counts = Map.fromListWith (+) states |
| |
| putStrLn "---------------------" |
| putStrLn "Instance status count" |
| putStrLn "---------------------" |
| putStr . unlines . Map.elems $ |
| Map.mapWithKey (\k v -> k ++ ": " ++ show v) counts |