| {-| Cluster checker. |
| |
| -} |
| |
| {- |
| |
| Copyright (C) 2012, 2013 Google Inc. |
| All rights reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions are |
| met: |
| |
| 1. Redistributions of source code must retain the above copyright notice, |
| this list of conditions and the following disclaimer. |
| |
| 2. Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS |
| IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
| CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| -} |
| |
| module Ganeti.HTools.Program.Hcheck |
| ( main |
| , options |
| , arguments |
| ) where |
| |
| import Control.Monad |
| import Data.List (transpose) |
| import System.Exit |
| import Text.Printf (printf) |
| |
| import qualified Ganeti.HTools.Container as Container |
| import qualified Ganeti.HTools.Cluster as Cluster |
| import qualified Ganeti.HTools.Group as Group |
| import qualified Ganeti.HTools.Node as Node |
| import qualified Ganeti.HTools.Instance as Instance |
| |
| import qualified Ganeti.HTools.Program.Hbal as Hbal |
| |
| import Ganeti.Common |
| import Ganeti.HTools.CLI |
| import Ganeti.HTools.ExtLoader |
| import Ganeti.HTools.Loader |
| import Ganeti.HTools.Types |
| import Ganeti.Utils |
| |
| -- | Options list and functions. |
| options :: IO [OptType] |
| options = do |
| luxi <- oLuxiSocket |
| return |
| [ oDataFile |
| , oDiskMoves |
| , oDynuFile |
| , oEvacMode |
| , oExInst |
| , oExTags |
| , oIAllocSrc |
| , oInstMoves |
| , luxi |
| , oMachineReadable |
| , oMaxCpu |
| , oMaxSolLength |
| , oMinDisk |
| , oMinGain |
| , oMinGainLim |
| , oMinScore |
| , oNoSimulation |
| , oOfflineNode |
| , oQuiet |
| , oRapiMaster |
| , oSelInst |
| , oVerbose |
| ] |
| |
| -- | The list of arguments supported by the program. |
| arguments :: [ArgCompletion] |
| arguments = [] |
| |
| -- | Check phase - are we before (initial) or after rebalance. |
| data Phase = Initial |
| | Rebalanced |
| |
| -- | Level of presented statistics. |
| data Level = GroupLvl String -- ^ Group level, with name |
| | ClusterLvl -- ^ Cluster level |
| |
| -- | A type alias for a group index and node\/instance lists. |
| type GroupInfo = (Gdx, (Node.List, Instance.List)) |
| |
| -- | A type alias for group stats. |
| type GroupStats = ((Group.Group, Double), [Int]) |
| |
| -- | Prefix for machine readable names. |
| htcPrefix :: String |
| htcPrefix = "HCHECK" |
| |
| -- | Data showed both per group and per cluster. |
| commonData :: [(String, String)] |
| commonData =[ ("N1_FAIL", "Nodes not N+1 happy") |
| , ("CONFLICT_TAGS", "Nodes with conflicting instances") |
| , ("OFFLINE_PRI", "Instances having the primary node offline") |
| , ("OFFLINE_SEC", "Instances having a secondary node offline") |
| ] |
| |
| -- | Data showed per group. |
| groupData :: [(String, String)] |
| groupData = commonData ++ [("SCORE", "Group score")] |
| |
| -- | Data showed per cluster. |
| clusterData :: [(String, String)] |
| clusterData = commonData ++ |
| [ ("NEED_REBALANCE", "Cluster is not healthy") ] |
| |
| -- | Phase-specific prefix for machine readable version. |
| phasePrefix :: Phase -> String |
| phasePrefix Initial = "INIT" |
| phasePrefix Rebalanced = "FINAL" |
| |
| -- | Level-specific prefix for machine readable version. |
| levelPrefix :: Level -> String |
| levelPrefix GroupLvl {} = "GROUP" |
| levelPrefix ClusterLvl = "CLUSTER" |
| |
| -- | Machine-readable keys to show depending on given level. |
| keysData :: Level -> [String] |
| keysData GroupLvl {} = map fst groupData |
| keysData ClusterLvl = map fst clusterData |
| |
| -- | Description of phases for human readable version. |
| phaseDescr :: Phase -> String |
| phaseDescr Initial = "initially" |
| phaseDescr Rebalanced = "after rebalancing" |
| |
| -- | Description to show depending on given level. |
| descrData :: Level -> [String] |
| descrData GroupLvl {} = map snd groupData |
| descrData ClusterLvl = map snd clusterData |
| |
| -- | Human readable prefix for statistics. |
| phaseLevelDescr :: Phase -> Level -> String |
| phaseLevelDescr phase (GroupLvl name) = |
| printf "Statistics for group %s %s\n" name $ phaseDescr phase |
| phaseLevelDescr phase ClusterLvl = |
| printf "Cluster statistics %s\n" $ phaseDescr phase |
| |
| -- | Format a list of key, value as a shell fragment. |
| printKeysHTC :: [(String, String)] -> IO () |
| printKeysHTC = printKeys htcPrefix |
| |
| -- | Prepare string from boolean value. |
| printBool :: Bool -- ^ Whether the result should be machine readable |
| -> Bool -- ^ Value to be converted to string |
| -> String |
| printBool True True = "1" |
| printBool True False = "0" |
| printBool False b = show b |
| |
| -- | Print mapping from group idx to group uuid (only in machine |
| -- readable mode). |
| printGroupsMappings :: Group.List -> IO () |
| printGroupsMappings gl = do |
| let extract_vals g = (printf "GROUP_UUID_%d" $ Group.idx g :: String, |
| Group.uuid g) |
| printpairs = map extract_vals (Container.elems gl) |
| printKeysHTC printpairs |
| |
| -- | Prepare a single key given a certain level and phase of simulation. |
| prepareKey :: Level -> Phase -> String -> String |
| prepareKey level@ClusterLvl phase suffix = |
| printf "%s_%s_%s" (phasePrefix phase) (levelPrefix level) suffix |
| prepareKey level@(GroupLvl idx) phase suffix = |
| printf "%s_%s_%s_%s" (phasePrefix phase) (levelPrefix level) idx suffix |
| |
| -- | Print all the statistics for given level and phase. |
| printStats :: Int -- ^ Verbosity level |
| -> Bool -- ^ If the output should be machine readable |
| -> Level -- ^ Level on which we are printing |
| -> Phase -- ^ Current phase of simulation |
| -> [String] -- ^ Values to print |
| -> IO () |
| printStats _ True level phase values = do |
| let keys = map (prepareKey level phase) (keysData level) |
| printKeysHTC $ zip keys values |
| |
| printStats verbose False level phase values = do |
| let prefix = phaseLevelDescr phase level |
| descr = descrData level |
| unless (verbose < 1) $ do |
| putStrLn "" |
| putStr prefix |
| mapM_ (uncurry (printf " %s: %s\n")) (zip descr values) |
| |
| -- | Extract name or idx from group. |
| extractGroupData :: Bool -> Group.Group -> String |
| extractGroupData True grp = show $ Group.idx grp |
| extractGroupData False grp = Group.name grp |
| |
| -- | Prepare values for group. |
| prepareGroupValues :: [Int] -> Double -> [String] |
| prepareGroupValues stats score = |
| map show stats ++ [printf "%.8f" score] |
| |
| -- | Prepare values for cluster. |
| prepareClusterValues :: Bool -> [Int] -> [Bool] -> [String] |
| prepareClusterValues machineread stats bstats = |
| map show stats ++ map (printBool machineread) bstats |
| |
| -- | Print all the statistics on a group level. |
| printGroupStats :: Int -> Bool -> Phase -> GroupStats -> IO () |
| printGroupStats verbose machineread phase ((grp, score), stats) = do |
| let values = prepareGroupValues stats score |
| extradata = extractGroupData machineread grp |
| printStats verbose machineread (GroupLvl extradata) phase values |
| |
| -- | Print all the statistics on a cluster (global) level. |
| printClusterStats :: Int -> Bool -> Phase -> [Int] -> Bool -> IO () |
| printClusterStats verbose machineread phase stats needhbal = do |
| let values = prepareClusterValues machineread stats [needhbal] |
| printStats verbose machineread ClusterLvl phase values |
| |
| -- | Check if any of cluster metrics is non-zero. |
| clusterNeedsRebalance :: [Int] -> Bool |
| clusterNeedsRebalance stats = sum stats > 0 |
| |
| {- | Check group for N+1 hapiness, conflicts of primaries on nodes and |
| instances residing on offline nodes. |
| |
| -} |
| perGroupChecks :: Group.List -> GroupInfo -> GroupStats |
| perGroupChecks gl (gidx, (nl, il)) = |
| let grp = Container.find gidx gl |
| offnl = filter Node.offline (Container.elems nl) |
| n1violated = length . fst $ Cluster.computeBadItems nl il |
| conflicttags = length $ filter (>0) |
| (map Node.conflictingPrimaries (Container.elems nl)) |
| offline_pri = sum . map length $ map Node.pList offnl |
| offline_sec = length $ map Node.sList offnl |
| score = Cluster.compCV nl |
| groupstats = [ n1violated |
| , conflicttags |
| , offline_pri |
| , offline_sec |
| ] |
| in ((grp, score), groupstats) |
| |
| -- | Use Hbal's iterateDepth to simulate group rebalance. |
| executeSimulation :: Options -> Cluster.Table -> Double |
| -> Gdx -> Node.List -> Instance.List |
| -> IO GroupInfo |
| executeSimulation opts ini_tbl min_cv gidx nl il = do |
| let imlen = maximum . map (length . Instance.alias) $ Container.elems il |
| nmlen = maximum . map (length . Node.alias) $ Container.elems nl |
| |
| (fin_tbl, _) <- Hbal.iterateDepth False ini_tbl |
| (optMaxLength opts) |
| (optDiskMoves opts) |
| (optInstMoves opts) |
| False |
| nmlen imlen [] min_cv |
| (optMinGainLim opts) (optMinGain opts) |
| (optEvacMode opts) |
| |
| let (Cluster.Table fin_nl fin_il _ _) = fin_tbl |
| return (gidx, (fin_nl, fin_il)) |
| |
| -- | Simulate group rebalance if group's score is not good |
| maybeSimulateGroupRebalance :: Options -> GroupInfo -> IO GroupInfo |
| maybeSimulateGroupRebalance opts (gidx, (nl, il)) = do |
| let ini_cv = Cluster.compCV nl |
| ini_tbl = Cluster.Table nl il ini_cv [] |
| min_cv = optMinScore opts |
| if ini_cv < min_cv |
| then return (gidx, (nl, il)) |
| else executeSimulation opts ini_tbl min_cv gidx nl il |
| |
| -- | Decide whether to simulate rebalance. |
| maybeSimulateRebalance :: Bool -- ^ Whether to simulate rebalance |
| -> Options -- ^ Command line options |
| -> [GroupInfo] -- ^ Group data |
| -> IO [GroupInfo] |
| maybeSimulateRebalance True opts cluster = |
| mapM (maybeSimulateGroupRebalance opts) cluster |
| maybeSimulateRebalance False _ cluster = return cluster |
| |
| -- | Prints the final @OK@ marker in machine readable output. |
| printFinalHTC :: Bool -> IO () |
| printFinalHTC = printFinal htcPrefix |
| |
| -- | Main function. |
| main :: Options -> [String] -> IO () |
| main opts args = do |
| unless (null args) $ exitErr "This program doesn't take any arguments." |
| |
| let verbose = optVerbose opts |
| machineread = optMachineReadable opts |
| nosimulation = optNoSimulation opts |
| |
| (ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts |
| nlf <- setNodeStatus opts fixed_nl |
| |
| let splitcluster = Cluster.splitCluster nlf ilf |
| |
| when machineread $ printGroupsMappings gl |
| |
| let groupsstats = map (perGroupChecks gl) splitcluster |
| clusterstats = map sum . transpose . map snd $ groupsstats |
| needrebalance = clusterNeedsRebalance clusterstats |
| |
| unless (verbose < 1 || machineread) . |
| putStrLn $ if nosimulation |
| then "Running in no-simulation mode." |
| else if needrebalance |
| then "Cluster needs rebalancing." |
| else "No need to rebalance cluster, no problems found." |
| |
| mapM_ (printGroupStats verbose machineread Initial) groupsstats |
| |
| printClusterStats verbose machineread Initial clusterstats needrebalance |
| |
| let exitOK = nosimulation || not needrebalance |
| simulate = not nosimulation && needrebalance |
| |
| rebalancedcluster <- maybeSimulateRebalance simulate opts splitcluster |
| |
| when (simulate || machineread) $ do |
| let newgroupstats = map (perGroupChecks gl) rebalancedcluster |
| newclusterstats = map sum . transpose . map snd $ newgroupstats |
| newneedrebalance = clusterNeedsRebalance clusterstats |
| |
| mapM_ (printGroupStats verbose machineread Rebalanced) newgroupstats |
| |
| printClusterStats verbose machineread Rebalanced newclusterstats |
| newneedrebalance |
| |
| printFinalHTC machineread |
| |
| unless exitOK . exitWith $ ExitFailure 1 |