blob: 1f8eaba47fd6af106b02f2542c64811242cbd9ce [file] [log] [blame]
{-| Cluster rebalancer.
-}
{-
Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
-}
module Ganeti.HTools.Program.Hbal
( main
, options
, arguments
, iterateDepth
) where
import Control.Exception (bracket)
import Control.Monad
import Data.List
import Data.Maybe (isJust, isNothing, fromJust)
import Data.IORef
import System.Exit
import System.IO
import System.Posix.Process
import System.Posix.Signals
import Text.Printf (printf)
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Cluster as Cluster
import qualified Ganeti.HTools.Group as Group
import qualified Ganeti.HTools.Node as Node
import qualified Ganeti.HTools.Instance as Instance
import Ganeti.BasicTypes
import Ganeti.Common
import Ganeti.HTools.CLI
import Ganeti.HTools.ExtLoader
import Ganeti.HTools.Types
import Ganeti.HTools.Loader
import Ganeti.OpCodes (wrapOpCode, setOpComment, setOpPriority,
OpCode, MetaOpCode)
import Ganeti.Jobs as Jobs
import Ganeti.Types
import Ganeti.Utils
import qualified Ganeti.Luxi as L
import Ganeti.Version (version)
-- | Options list and functions.
options :: IO [OptType]
options = do
luxi <- oLuxiSocket
return
[ oPrintNodes
, oPrintInsts
, oPrintCommands
, oDataFile
, oEvacMode
, oRapiMaster
, luxi
, oIAllocSrc
, oExecJobs
, oGroup
, oMaxSolLength
, oVerbose
, oQuiet
, oOfflineNode
, oMinScore
, oMaxCpu
, oMinDisk
, oMinGain
, oMinGainLim
, oDiskMoves
, oSelInst
, oInstMoves
, oDynuFile
, oExTags
, oExInst
, oSaveCluster
, oPriority
]
-- | The list of arguments supported by the program.
arguments :: [ArgCompletion]
arguments = []
-- | A simple type alias for clearer signature.
type Annotator = OpCode -> MetaOpCode
-- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment
-- about what generated the opcode.
annotateOpCode :: Annotator
annotateOpCode =
setOpComment ("rebalancing via hbal " ++ version) . wrapOpCode
{- | Start computing the solution at the given depth and recurse until
we find a valid solution or we exceed the maximum depth.
-}
iterateDepth :: Bool -- ^ Whether to print moves
-> Cluster.Table -- ^ The starting table
-> Int -- ^ Remaining length
-> Bool -- ^ Allow disk moves
-> Bool -- ^ Allow instance moves
-> Int -- ^ Max node name len
-> Int -- ^ Max instance name len
-> [MoveJob] -- ^ Current command list
-> Score -- ^ Score at which to stop
-> Score -- ^ Min gain limit
-> Score -- ^ Min score gain
-> Bool -- ^ Enable evacuation mode
-> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
-- and commands
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
cmd_strs min_score mg_limit min_gain evac_mode =
let Cluster.Table ini_nl ini_il _ _ = ini_tbl
allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
m_fin_tbl = if allowed_next
then Cluster.tryBalance ini_tbl disk_moves inst_moves
evac_mode mg_limit min_gain
else Nothing
in case m_fin_tbl of
Just fin_tbl ->
do
let (Cluster.Table _ _ _ fin_plc) = fin_tbl
cur_plc@(idx, _, _, move, _) <-
exitIfEmpty "Empty placement list returned for solution?!" fin_plc
let fin_plc_len = length fin_plc
(sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
nmlen imlen cur_plc fin_plc_len
afn = Cluster.involvedNodes ini_il cur_plc
upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
when printmove $ do
putStrLn sol_line
hFlush stdout
iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
nmlen imlen upd_cmd_strs min_score
mg_limit min_gain evac_mode
Nothing -> return (ini_tbl, cmd_strs)
-- | Displays the cluster stats.
printStats :: Node.List -> Node.List -> IO ()
printStats ini_nl fin_nl = do
let ini_cs = Cluster.totalResources ini_nl
fin_cs = Cluster.totalResources fin_nl
printf "Original: mem=%d disk=%d\n"
(Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
printf "Final: mem=%d disk=%d\n"
(Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
-- | Saves the rebalance commands to a text file.
saveBalanceCommands :: Options -> String -> IO ()
saveBalanceCommands opts cmd_data = do
let out_path = fromJust $ optShowCmds opts
putStrLn ""
if out_path == "-"
then printf "Commands to run to reach the above solution:\n%s"
(unlines . map (" " ++) .
filter (/= " check") .
lines $ cmd_data)
else do
writeFile out_path (shTemplate ++ cmd_data)
printf "The commands have been written to file '%s'\n" out_path
-- | Wrapper over execJobSet checking for early termination via an IORef.
execCancelWrapper :: Annotator -> String -> Node.List
-> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
execCancelWrapper _ _ _ _ _ [] = return $ Ok ()
execCancelWrapper anno master nl il cref alljss = do
cancel <- readIORef cref
if cancel > 0
then do
putStrLn $ "Exiting early due to user request, " ++
show (length alljss) ++ " jobset(s) remaining."
return $ Ok ()
else execJobSet anno master nl il cref alljss
-- | Execute an entire jobset.
execJobSet :: Annotator -> String -> Node.List
-> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
execJobSet _ _ _ _ _ [] = return $ Ok ()
execJobSet anno master nl il cref (js:jss) = do
-- map from jobset (htools list of positions) to [[opcodes]]
let jobs = map (\(_, idx, move, _) ->
map anno $ Cluster.iMoveToJob nl il idx move) js
descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
logfn =
putStrLn . ("Got job IDs " ++) . commaJoin . map (show . fromJobId)
putStrLn $ "Executing jobset for instances " ++ commaJoin descr
jrs <- bracket (L.getClient master) L.closeClient $
Jobs.execJobsWait jobs logfn
case jrs of
Bad x -> return $ Bad x
Ok x -> if null failures
then execCancelWrapper anno master nl il cref jss
else return . Bad . unlines $ [
"Not all jobs completed successfully: " ++ show failures,
"Aborting."]
where
failures = filter ((/= JOB_STATUS_SUCCESS) . snd) x
-- | Executes the jobs, if possible and desired.
maybeExecJobs :: Options
-> [a]
-> Node.List
-> Instance.List
-> [JobSet]
-> IO (Result ())
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
if optExecJobs opts && not (null ord_plc)
then (case optLuxi opts of
Nothing ->
return $ Bad "Execution of commands possible only on LUXI"
Just master ->
let annotator = maybe id setOpPriority (optPriority opts) .
annotateOpCode
in execWithCancel annotator master fin_nl il cmd_jobs)
else return $ Ok ()
-- | Signal handler for graceful termination.
handleSigInt :: IORef Int -> IO ()
handleSigInt cref = do
writeIORef cref 1
putStrLn ("Cancel request registered, will exit at" ++
" the end of the current job set...")
-- | Signal handler for immediate termination.
handleSigTerm :: IORef Int -> IO ()
handleSigTerm cref = do
-- update the cref to 2, just for consistency
writeIORef cref 2
putStrLn "Double cancel request, exiting now..."
exitImmediately $ ExitFailure 2
-- | Prepares to run a set of jobsets with handling of signals and early
-- termination.
execWithCancel :: Annotator -> String -> Node.List -> Instance.List -> [JobSet]
-> IO (Result ())
execWithCancel anno master fin_nl il cmd_jobs = do
cref <- newIORef 0
mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
[(handleSigTerm, softwareTermination), (handleSigInt, keyboardSignal)]
execCancelWrapper anno master fin_nl il cref cmd_jobs
-- | Select the target node group.
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
-> IO (String, (Node.List, Instance.List))
selectGroup opts gl nlf ilf = do
let ngroups = Cluster.splitCluster nlf ilf
when (length ngroups > 1 && isNothing (optGroup opts)) $ do
hPutStrLn stderr "Found multiple node groups:"
mapM_ (hPutStrLn stderr . (" " ++) . Group.name .
flip Container.find gl . fst) ngroups
exitErr "Aborting."
case optGroup opts of
Nothing -> do
(gidx, cdata) <- exitIfEmpty "No groups found by splitCluster?!" ngroups
let grp = Container.find gidx gl
return (Group.name grp, cdata)
Just g -> case Container.findByName gl g of
Nothing -> do
hPutStrLn stderr $ "Node group " ++ g ++
" not found. Node group list is:"
mapM_ (hPutStrLn stderr . (" " ++) . Group.name ) (Container.elems gl)
exitErr "Aborting."
Just grp ->
case lookup (Group.idx grp) ngroups of
Nothing ->
-- This will only happen if there are no nodes assigned
-- to this group
return (Group.name grp, (Container.empty, Container.empty))
Just cdata -> return (Group.name grp, cdata)
-- | Do a few checks on the cluster data.
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
checkCluster verbose nl il = do
-- nothing to do on an empty cluster
when (Container.null il) $ do
printf "Cluster is empty, exiting.\n"::IO ()
exitSuccess
-- hbal doesn't currently handle split clusters
let split_insts = Cluster.findSplitInstances nl il
unless (null split_insts || verbose <= 1) $ do
hPutStrLn stderr "Found instances belonging to multiple node groups:"
mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts
hPutStrLn stderr "These instances will not be moved."
printf "Loaded %d nodes, %d instances\n"
(Container.size nl)
(Container.size il)::IO ()
let csf = commonSuffix nl il
when (not (null csf) && verbose > 1) $
printf "Note: Stripping common suffix of '%s' from names\n" csf
-- | Do a few checks on the selected group data.
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
checkGroup verbose gname nl il = do
printf "Group size %d nodes, %d instances\n"
(Container.size nl)
(Container.size il)::IO ()
putStrLn $ "Selected node group: " ++ gname
let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
unless (verbose < 1) $ printf
"Initial check done: %d bad nodes, %d bad instances.\n"
(length bad_nodes) (length bad_instances)
unless (null bad_nodes) $
putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
\that the cluster will end N+1 happy."
-- | Check that we actually need to rebalance.
checkNeedRebalance :: Options -> Score -> IO ()
checkNeedRebalance opts ini_cv = do
let min_cv = optMinScore opts
when (ini_cv < min_cv) $ do
printf "Cluster is already well balanced (initial score %.6g,\n\
\minimum score %.6g).\nNothing to do, exiting\n"
ini_cv min_cv:: IO ()
exitSuccess
-- | Main function.
main :: Options -> [String] -> IO ()
main opts args = do
unless (null args) $ exitErr "This program doesn't take any arguments."
let verbose = optVerbose opts
shownodes = optShowNodes opts
showinsts = optShowInsts opts
ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
when (verbose > 1) $ do
putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
nlf <- setNodeStatus opts fixed_nl
checkCluster verbose nlf ilf
maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
(gname, (nl, il)) <- selectGroup opts gl nlf ilf
checkGroup verbose gname nl il
maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
let ini_cv = Cluster.compCV nl
ini_tbl = Cluster.Table nl il ini_cv []
min_cv = optMinScore opts
checkNeedRebalance opts ini_cv
if verbose > 2
then printf "Initial coefficients: overall %.8f\n%s"
ini_cv (Cluster.printStats " " nl)::IO ()
else printf "Initial score: %.8f\n" ini_cv
putStrLn "Trying to minimize the CV..."
let imlen = maximum . map (length . Instance.alias) $ Container.elems il
nmlen = maximum . map (length . Node.alias) $ Container.elems nl
(fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
(optDiskMoves opts)
(optInstMoves opts)
nmlen imlen [] min_cv
(optMinGainLim opts) (optMinGain opts)
(optEvacMode opts)
let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
ord_plc = reverse fin_plc
sol_msg = case () of
_ | null fin_plc -> printf "No solution found\n"
| verbose > 2 ->
printf "Final coefficients: overall %.8f\n%s"
fin_cv (Cluster.printStats " " fin_nl)
| otherwise ->
printf "Cluster score improved from %.8f to %.8f\n"
ini_cv fin_cv ::String
putStr sol_msg
unless (verbose < 1) $
printf "Solution length=%d\n" (length ord_plc)
let cmd_jobs = Cluster.splitJobs cmd_strs
when (isJust $ optShowCmds opts) .
saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
when (verbose > 3) $ printStats nl fin_nl
exitIfBad "hbal" =<< maybeExecJobs opts ord_plc fin_nl il cmd_jobs