| {-| Implementation of cluster-wide logic. |
| |
| This module holds all pure cluster-logic; I\/O related functionality |
| goes into the /Main/ module for the individual binaries. |
| |
| -} |
| |
| {- |
| |
| Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2 of the License, or |
| (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
| 02110-1301, USA. |
| |
| -} |
| |
| module Ganeti.HTools.Cluster |
| ( |
| -- * Types |
| AllocSolution(..) |
| , EvacSolution(..) |
| , Table(..) |
| , CStats(..) |
| , AllocNodes |
| , AllocResult |
| , AllocMethod |
| , AllocSolutionList |
| -- * Generic functions |
| , totalResources |
| , computeAllocationDelta |
| -- * First phase functions |
| , computeBadItems |
| -- * Second phase functions |
| , printSolutionLine |
| , formatCmds |
| , involvedNodes |
| , splitJobs |
| -- * Display functions |
| , printNodes |
| , printInsts |
| -- * Balacing functions |
| , checkMove |
| , doNextBalance |
| , tryBalance |
| , compCV |
| , compCVNodes |
| , compDetailedCV |
| , printStats |
| , iMoveToJob |
| -- * IAllocator functions |
| , genAllocNodes |
| , tryAlloc |
| , tryMGAlloc |
| , tryNodeEvac |
| , tryChangeGroup |
| , collapseFailures |
| , allocList |
| -- * Allocation functions |
| , iterateAlloc |
| , tieredAlloc |
| -- * Node group functions |
| , instanceGroup |
| , findSplitInstances |
| , splitCluster |
| ) where |
| |
| import Control.Applicative (liftA2) |
| import Control.Arrow ((&&&)) |
| import qualified Data.IntSet as IntSet |
| import Data.List |
| import Data.Maybe (fromJust, fromMaybe, isJust, isNothing) |
| import Data.Ord (comparing) |
| import Text.Printf (printf) |
| |
| import Ganeti.BasicTypes |
| import qualified Ganeti.HTools.Container as Container |
| import qualified Ganeti.HTools.Instance as Instance |
| import qualified Ganeti.HTools.Nic as Nic |
| import qualified Ganeti.HTools.Node as Node |
| import qualified Ganeti.HTools.Group as Group |
| import Ganeti.HTools.Types |
| import Ganeti.Compat |
| import qualified Ganeti.OpCodes as OpCodes |
| import Ganeti.Utils |
| import Ganeti.Types (mkNonEmpty) |
| |
| -- * Types |
| |
| -- | Allocation\/relocation solution. |
| data AllocSolution = AllocSolution |
| { asFailures :: [FailMode] -- ^ Failure counts |
| , asAllocs :: Int -- ^ Good allocation count |
| , asSolution :: Maybe Node.AllocElement -- ^ The actual allocation result |
| , asLog :: [String] -- ^ Informational messages |
| } |
| |
| -- | Node evacuation/group change iallocator result type. This result |
| -- type consists of actual opcodes (a restricted subset) that are |
| -- transmitted back to Ganeti. |
| data EvacSolution = EvacSolution |
| { esMoved :: [(Idx, Gdx, [Ndx])] -- ^ Instances moved successfully |
| , esFailed :: [(Idx, String)] -- ^ Instances which were not |
| -- relocated |
| , esOpCodes :: [[OpCodes.OpCode]] -- ^ List of jobs |
| } deriving (Show) |
| |
| -- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'. |
| type AllocResult = (FailStats, Node.List, Instance.List, |
| [Instance.Instance], [CStats]) |
| |
| -- | Type alias for easier handling. |
| type AllocSolutionList = [(Instance.Instance, AllocSolution)] |
| |
| -- | A type denoting the valid allocation mode/pairs. |
| -- |
| -- For a one-node allocation, this will be a @Left ['Ndx']@, whereas |
| -- for a two-node allocation, this will be a @Right [('Ndx', |
| -- ['Ndx'])]@. In the latter case, the list is basically an |
| -- association list, grouped by primary node and holding the potential |
| -- secondary nodes in the sub-list. |
| type AllocNodes = Either [Ndx] [(Ndx, [Ndx])] |
| |
| -- | The empty solution we start with when computing allocations. |
| emptyAllocSolution :: AllocSolution |
| emptyAllocSolution = AllocSolution { asFailures = [], asAllocs = 0 |
| , asSolution = Nothing, asLog = [] } |
| |
| -- | The empty evac solution. |
| emptyEvacSolution :: EvacSolution |
| emptyEvacSolution = EvacSolution { esMoved = [] |
| , esFailed = [] |
| , esOpCodes = [] |
| } |
| |
| -- | The complete state for the balancing solution. |
| data Table = Table Node.List Instance.List Score [Placement] |
| deriving (Show) |
| |
| -- | Cluster statistics data type. |
| data CStats = CStats |
| { csFmem :: Integer -- ^ Cluster free mem |
| , csFdsk :: Integer -- ^ Cluster free disk |
| , csFspn :: Integer -- ^ Cluster free spindles |
| , csAmem :: Integer -- ^ Cluster allocatable mem |
| , csAdsk :: Integer -- ^ Cluster allocatable disk |
| , csAcpu :: Integer -- ^ Cluster allocatable cpus |
| , csMmem :: Integer -- ^ Max node allocatable mem |
| , csMdsk :: Integer -- ^ Max node allocatable disk |
| , csMcpu :: Integer -- ^ Max node allocatable cpu |
| , csImem :: Integer -- ^ Instance used mem |
| , csIdsk :: Integer -- ^ Instance used disk |
| , csIspn :: Integer -- ^ Instance used spindles |
| , csIcpu :: Integer -- ^ Instance used cpu |
| , csTmem :: Double -- ^ Cluster total mem |
| , csTdsk :: Double -- ^ Cluster total disk |
| , csTspn :: Double -- ^ Cluster total spindles |
| , csTcpu :: Double -- ^ Cluster total cpus |
| , csVcpu :: Integer -- ^ Cluster total virtual cpus |
| , csNcpu :: Double -- ^ Equivalent to 'csIcpu' but in terms of |
| -- physical CPUs, i.e. normalised used phys CPUs |
| , csXmem :: Integer -- ^ Unnacounted for mem |
| , csNmem :: Integer -- ^ Node own memory |
| , csScore :: Score -- ^ The cluster score |
| , csNinst :: Int -- ^ The total number of instances |
| } deriving (Show) |
| |
| -- | A simple type for allocation functions. |
| type AllocMethod = Node.List -- ^ Node list |
| -> Instance.List -- ^ Instance list |
| -> Maybe Int -- ^ Optional allocation limit |
| -> Instance.Instance -- ^ Instance spec for allocation |
| -> AllocNodes -- ^ Which nodes we should allocate on |
| -> [Instance.Instance] -- ^ Allocated instances |
| -> [CStats] -- ^ Running cluster stats |
| -> Result AllocResult -- ^ Allocation result |
| |
| -- | A simple type for the running solution of evacuations. |
| type EvacInnerState = |
| Either String (Node.List, Instance.Instance, Score, Ndx) |
| |
| -- * Utility functions |
| |
| -- | Verifies the N+1 status and return the affected nodes. |
| verifyN1 :: [Node.Node] -> [Node.Node] |
| verifyN1 = filter Node.failN1 |
| |
| {-| Computes the pair of bad nodes and instances. |
| |
| The bad node list is computed via a simple 'verifyN1' check, and the |
| bad instance list is the list of primary and secondary instances of |
| those nodes. |
| |
| -} |
| computeBadItems :: Node.List -> Instance.List -> |
| ([Node.Node], [Instance.Instance]) |
| computeBadItems nl il = |
| let bad_nodes = verifyN1 $ getOnline nl |
| bad_instances = map (`Container.find` il) . |
| sort . nub $ |
| concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes |
| in |
| (bad_nodes, bad_instances) |
| |
| -- | Extracts the node pairs for an instance. This can fail if the |
| -- instance is single-homed. FIXME: this needs to be improved, |
| -- together with the general enhancement for handling non-DRBD moves. |
| instanceNodes :: Node.List -> Instance.Instance -> |
| (Ndx, Ndx, Node.Node, Node.Node) |
| instanceNodes nl inst = |
| let old_pdx = Instance.pNode inst |
| old_sdx = Instance.sNode inst |
| old_p = Container.find old_pdx nl |
| old_s = Container.find old_sdx nl |
| in (old_pdx, old_sdx, old_p, old_s) |
| |
| -- | Zero-initializer for the CStats type. |
| emptyCStats :: CStats |
| emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
| |
| -- | Update stats with data from a new node. |
| updateCStats :: CStats -> Node.Node -> CStats |
| updateCStats cs node = |
| let CStats { csFmem = x_fmem, csFdsk = x_fdsk, |
| csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk, |
| csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu, |
| csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu, |
| csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu, |
| csVcpu = x_vcpu, csNcpu = x_ncpu, |
| csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst, |
| csFspn = x_fspn, csIspn = x_ispn, csTspn = x_tspn |
| } |
| = cs |
| inc_amem = Node.fMem node - Node.rMem node |
| inc_amem' = if inc_amem > 0 then inc_amem else 0 |
| inc_adsk = Node.availDisk node |
| inc_imem = truncate (Node.tMem node) - Node.nMem node |
| - Node.xMem node - Node.fMem node |
| inc_icpu = Node.uCpu node |
| inc_idsk = truncate (Node.tDsk node) - Node.fDsk node |
| inc_ispn = Node.tSpindles node - Node.fSpindles node |
| inc_vcpu = Node.hiCpu node |
| inc_acpu = Node.availCpu node |
| inc_ncpu = fromIntegral (Node.uCpu node) / |
| iPolicyVcpuRatio (Node.iPolicy node) |
| in cs { csFmem = x_fmem + fromIntegral (Node.fMem node) |
| , csFdsk = x_fdsk + fromIntegral (Node.fDsk node) |
| , csFspn = x_fspn + fromIntegral (Node.fSpindles node) |
| , csAmem = x_amem + fromIntegral inc_amem' |
| , csAdsk = x_adsk + fromIntegral inc_adsk |
| , csAcpu = x_acpu + fromIntegral inc_acpu |
| , csMmem = max x_mmem (fromIntegral inc_amem') |
| , csMdsk = max x_mdsk (fromIntegral inc_adsk) |
| , csMcpu = max x_mcpu (fromIntegral inc_acpu) |
| , csImem = x_imem + fromIntegral inc_imem |
| , csIdsk = x_idsk + fromIntegral inc_idsk |
| , csIspn = x_ispn + fromIntegral inc_ispn |
| , csIcpu = x_icpu + fromIntegral inc_icpu |
| , csTmem = x_tmem + Node.tMem node |
| , csTdsk = x_tdsk + Node.tDsk node |
| , csTspn = x_tspn + fromIntegral (Node.tSpindles node) |
| , csTcpu = x_tcpu + Node.tCpu node |
| , csVcpu = x_vcpu + fromIntegral inc_vcpu |
| , csNcpu = x_ncpu + inc_ncpu |
| , csXmem = x_xmem + fromIntegral (Node.xMem node) |
| , csNmem = x_nmem + fromIntegral (Node.nMem node) |
| , csNinst = x_ninst + length (Node.pList node) |
| } |
| |
| -- | Compute the total free disk and memory in the cluster. |
| totalResources :: Node.List -> CStats |
| totalResources nl = |
| let cs = foldl' updateCStats emptyCStats . Container.elems $ nl |
| in cs { csScore = compCV nl } |
| |
| -- | Compute the delta between two cluster state. |
| -- |
| -- This is used when doing allocations, to understand better the |
| -- available cluster resources. The return value is a triple of the |
| -- current used values, the delta that was still allocated, and what |
| -- was left unallocated. |
| computeAllocationDelta :: CStats -> CStats -> AllocStats |
| computeAllocationDelta cini cfin = |
| let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu, |
| csNcpu = i_ncpu, csIspn = i_ispn } = cini |
| CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu, |
| csTmem = t_mem, csTdsk = t_dsk, csVcpu = f_vcpu, |
| csNcpu = f_ncpu, csTcpu = f_tcpu, |
| csIspn = f_ispn, csTspn = t_spn } = cfin |
| rini = AllocInfo { allocInfoVCpus = fromIntegral i_icpu |
| , allocInfoNCpus = i_ncpu |
| , allocInfoMem = fromIntegral i_imem |
| , allocInfoDisk = fromIntegral i_idsk |
| , allocInfoSpn = fromIntegral i_ispn |
| } |
| rfin = AllocInfo { allocInfoVCpus = fromIntegral (f_icpu - i_icpu) |
| , allocInfoNCpus = f_ncpu - i_ncpu |
| , allocInfoMem = fromIntegral (f_imem - i_imem) |
| , allocInfoDisk = fromIntegral (f_idsk - i_idsk) |
| , allocInfoSpn = fromIntegral (f_ispn - i_ispn) |
| } |
| runa = AllocInfo { allocInfoVCpus = fromIntegral (f_vcpu - f_icpu) |
| , allocInfoNCpus = f_tcpu - f_ncpu |
| , allocInfoMem = truncate t_mem - fromIntegral f_imem |
| , allocInfoDisk = truncate t_dsk - fromIntegral f_idsk |
| , allocInfoSpn = truncate t_spn - fromIntegral f_ispn |
| } |
| in (rini, rfin, runa) |
| |
| -- | The names and weights of the individual elements in the CV list. |
| detailedCVInfo :: [(Double, String)] |
| detailedCVInfo = [ (1, "free_mem_cv") |
| , (1, "free_disk_cv") |
| , (1, "n1_cnt") |
| , (1, "reserved_mem_cv") |
| , (4, "offline_all_cnt") |
| , (16, "offline_pri_cnt") |
| , (1, "vcpu_ratio_cv") |
| , (1, "cpu_load_cv") |
| , (1, "mem_load_cv") |
| , (1, "disk_load_cv") |
| , (1, "net_load_cv") |
| , (2, "pri_tags_score") |
| , (1, "spindles_cv") |
| ] |
| |
| -- | Holds the weights used by 'compCVNodes' for each metric. |
| detailedCVWeights :: [Double] |
| detailedCVWeights = map fst detailedCVInfo |
| |
| -- | Compute the mem and disk covariance. |
| compDetailedCV :: [Node.Node] -> [Double] |
| compDetailedCV all_nodes = |
| let (offline, nodes) = partition Node.offline all_nodes |
| mem_l = map Node.pMem nodes |
| dsk_l = map Node.pDsk nodes |
| -- metric: memory covariance |
| mem_cv = stdDev mem_l |
| -- metric: disk covariance |
| dsk_cv = stdDev dsk_l |
| -- metric: count of instances living on N1 failing nodes |
| n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) + |
| length (Node.pList n)) . |
| filter Node.failN1 $ nodes :: Double |
| res_l = map Node.pRem nodes |
| -- metric: reserved memory covariance |
| res_cv = stdDev res_l |
| -- offline instances metrics |
| offline_ipri = sum . map (length . Node.pList) $ offline |
| offline_isec = sum . map (length . Node.sList) $ offline |
| -- metric: count of instances on offline nodes |
| off_score = fromIntegral (offline_ipri + offline_isec)::Double |
| -- metric: count of primary instances on offline nodes (this |
| -- helps with evacuation/failover of primary instances on |
| -- 2-node clusters with one node offline) |
| off_pri_score = fromIntegral offline_ipri::Double |
| cpu_l = map Node.pCpu nodes |
| -- metric: covariance of vcpu/pcpu ratio |
| cpu_cv = stdDev cpu_l |
| -- metrics: covariance of cpu, memory, disk and network load |
| (c_load, m_load, d_load, n_load) = |
| unzip4 $ map (\n -> |
| let DynUtil c1 m1 d1 n1 = Node.utilLoad n |
| DynUtil c2 m2 d2 n2 = Node.utilPool n |
| in (c1/c2, m1/m2, d1/d2, n1/n2)) nodes |
| -- metric: conflicting instance count |
| pri_tags_inst = sum $ map Node.conflictingPrimaries nodes |
| pri_tags_score = fromIntegral pri_tags_inst::Double |
| -- metric: spindles % |
| spindles_cv = map (\n -> Node.instSpindles n / Node.hiSpindles n) nodes |
| in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv |
| , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load |
| , pri_tags_score, stdDev spindles_cv ] |
| |
| -- | Compute the /total/ variance. |
| compCVNodes :: [Node.Node] -> Double |
| compCVNodes = sum . zipWith (*) detailedCVWeights . compDetailedCV |
| |
| -- | Wrapper over 'compCVNodes' for callers that have a 'Node.List'. |
| compCV :: Node.List -> Double |
| compCV = compCVNodes . Container.elems |
| |
| -- | Compute online nodes from a 'Node.List'. |
| getOnline :: Node.List -> [Node.Node] |
| getOnline = filter (not . Node.offline) . Container.elems |
| |
| -- * Balancing functions |
| |
| -- | Compute best table. Note that the ordering of the arguments is important. |
| compareTables :: Table -> Table -> Table |
| compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) = |
| if a_cv > b_cv then b else a |
| |
| -- | Applies an instance move to a given node list and instance. |
| applyMove :: Node.List -> Instance.Instance |
| -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx) |
| -- Failover (f) |
| applyMove nl inst Failover = |
| let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst |
| int_p = Node.removePri old_p inst |
| int_s = Node.removeSec old_s inst |
| new_nl = do -- Maybe monad |
| new_p <- Node.addPriEx (Node.offline old_p) int_s inst |
| new_s <- Node.addSec int_p inst old_sdx |
| let new_inst = Instance.setBoth inst old_sdx old_pdx |
| return (Container.addTwo old_pdx new_s old_sdx new_p nl, |
| new_inst, old_sdx, old_pdx) |
| in new_nl |
| |
| -- Failover to any (fa) |
| applyMove nl inst (FailoverToAny new_pdx) = do |
| let (old_pdx, old_sdx, old_pnode, _) = instanceNodes nl inst |
| new_pnode = Container.find new_pdx nl |
| force_failover = Node.offline old_pnode |
| new_pnode' <- Node.addPriEx force_failover new_pnode inst |
| let old_pnode' = Node.removePri old_pnode inst |
| inst' = Instance.setPri inst new_pdx |
| nl' = Container.addTwo old_pdx old_pnode' new_pdx new_pnode' nl |
| return (nl', inst', new_pdx, old_sdx) |
| |
| -- Replace the primary (f:, r:np, f) |
| applyMove nl inst (ReplacePrimary new_pdx) = |
| let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst |
| tgt_n = Container.find new_pdx nl |
| int_p = Node.removePri old_p inst |
| int_s = Node.removeSec old_s inst |
| force_p = Node.offline old_p |
| new_nl = do -- Maybe monad |
| -- check that the current secondary can host the instance |
| -- during the migration |
| tmp_s <- Node.addPriEx force_p int_s inst |
| let tmp_s' = Node.removePri tmp_s inst |
| new_p <- Node.addPriEx force_p tgt_n inst |
| new_s <- Node.addSecEx force_p tmp_s' inst new_pdx |
| let new_inst = Instance.setPri inst new_pdx |
| return (Container.add new_pdx new_p $ |
| Container.addTwo old_pdx int_p old_sdx new_s nl, |
| new_inst, new_pdx, old_sdx) |
| in new_nl |
| |
| -- Replace the secondary (r:ns) |
| applyMove nl inst (ReplaceSecondary new_sdx) = |
| let old_pdx = Instance.pNode inst |
| old_sdx = Instance.sNode inst |
| old_s = Container.find old_sdx nl |
| tgt_n = Container.find new_sdx nl |
| int_s = Node.removeSec old_s inst |
| force_s = Node.offline old_s |
| new_inst = Instance.setSec inst new_sdx |
| new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>= |
| \new_s -> return (Container.addTwo new_sdx |
| new_s old_sdx int_s nl, |
| new_inst, old_pdx, new_sdx) |
| in new_nl |
| |
| -- Replace the secondary and failover (r:np, f) |
| applyMove nl inst (ReplaceAndFailover new_pdx) = |
| let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst |
| tgt_n = Container.find new_pdx nl |
| int_p = Node.removePri old_p inst |
| int_s = Node.removeSec old_s inst |
| force_s = Node.offline old_s |
| new_nl = do -- Maybe monad |
| new_p <- Node.addPri tgt_n inst |
| new_s <- Node.addSecEx force_s int_p inst new_pdx |
| let new_inst = Instance.setBoth inst new_pdx old_pdx |
| return (Container.add new_pdx new_p $ |
| Container.addTwo old_pdx new_s old_sdx int_s nl, |
| new_inst, new_pdx, old_pdx) |
| in new_nl |
| |
| -- Failver and replace the secondary (f, r:ns) |
| applyMove nl inst (FailoverAndReplace new_sdx) = |
| let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst |
| tgt_n = Container.find new_sdx nl |
| int_p = Node.removePri old_p inst |
| int_s = Node.removeSec old_s inst |
| force_p = Node.offline old_p |
| new_nl = do -- Maybe monad |
| new_p <- Node.addPriEx force_p int_s inst |
| new_s <- Node.addSecEx force_p tgt_n inst old_sdx |
| let new_inst = Instance.setBoth inst old_sdx new_sdx |
| return (Container.add new_sdx new_s $ |
| Container.addTwo old_sdx new_p old_pdx int_p nl, |
| new_inst, old_sdx, new_sdx) |
| in new_nl |
| |
| -- | Tries to allocate an instance on one given node. |
| allocateOnSingle :: Node.List -> Instance.Instance -> Ndx |
| -> OpResult Node.AllocElement |
| allocateOnSingle nl inst new_pdx = |
| let p = Container.find new_pdx nl |
| new_inst = Instance.setBoth inst new_pdx Node.noSecondary |
| in do |
| Instance.instMatchesPolicy inst (Node.iPolicy p) (Node.exclStorage p) |
| new_p <- Node.addPri p inst |
| let new_nl = Container.add new_pdx new_p nl |
| new_score = compCV new_nl |
| return (new_nl, new_inst, [new_p], new_score) |
| |
| -- | Tries to allocate an instance on a given pair of nodes. |
| allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx |
| -> OpResult Node.AllocElement |
| allocateOnPair nl inst new_pdx new_sdx = |
| let tgt_p = Container.find new_pdx nl |
| tgt_s = Container.find new_sdx nl |
| in do |
| Instance.instMatchesPolicy inst (Node.iPolicy tgt_p) |
| (Node.exclStorage tgt_p) |
| new_p <- Node.addPri tgt_p inst |
| new_s <- Node.addSec tgt_s inst new_pdx |
| let new_inst = Instance.setBoth inst new_pdx new_sdx |
| new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl |
| return (new_nl, new_inst, [new_p, new_s], compCV new_nl) |
| |
| -- | Tries to perform an instance move and returns the best table |
| -- between the original one and the new one. |
| checkSingleStep :: Table -- ^ The original table |
| -> Instance.Instance -- ^ The instance to move |
| -> Table -- ^ The current best table |
| -> IMove -- ^ The move to apply |
| -> Table -- ^ The final best table |
| checkSingleStep ini_tbl target cur_tbl move = |
| let Table ini_nl ini_il _ ini_plc = ini_tbl |
| tmp_resu = applyMove ini_nl target move |
| in case tmp_resu of |
| Bad _ -> cur_tbl |
| Ok (upd_nl, new_inst, pri_idx, sec_idx) -> |
| let tgt_idx = Instance.idx target |
| upd_cvar = compCV upd_nl |
| upd_il = Container.add tgt_idx new_inst ini_il |
| upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc |
| upd_tbl = Table upd_nl upd_il upd_cvar upd_plc |
| in compareTables cur_tbl upd_tbl |
| |
| -- | Given the status of the current secondary as a valid new node and |
| -- the current candidate target node, generate the possible moves for |
| -- a instance. |
| possibleMoves :: MirrorType -- ^ The mirroring type of the instance |
| -> Bool -- ^ Whether the secondary node is a valid new node |
| -> Bool -- ^ Whether we can change the primary node |
| -> Ndx -- ^ Target node candidate |
| -> [IMove] -- ^ List of valid result moves |
| |
| possibleMoves MirrorNone _ _ _ = [] |
| |
| possibleMoves MirrorExternal _ False _ = [] |
| |
| possibleMoves MirrorExternal _ True tdx = |
| [ FailoverToAny tdx ] |
| |
| possibleMoves MirrorInternal _ False tdx = |
| [ ReplaceSecondary tdx ] |
| |
| possibleMoves MirrorInternal True True tdx = |
| [ ReplaceSecondary tdx |
| , ReplaceAndFailover tdx |
| , ReplacePrimary tdx |
| , FailoverAndReplace tdx |
| ] |
| |
| possibleMoves MirrorInternal False True tdx = |
| [ ReplaceSecondary tdx |
| , ReplaceAndFailover tdx |
| ] |
| |
| -- | Compute the best move for a given instance. |
| checkInstanceMove :: [Ndx] -- ^ Allowed target node indices |
| -> Bool -- ^ Whether disk moves are allowed |
| -> Bool -- ^ Whether instance moves are allowed |
| -> Table -- ^ Original table |
| -> Instance.Instance -- ^ Instance to move |
| -> Table -- ^ Best new table for this instance |
| checkInstanceMove nodes_idx disk_moves inst_moves ini_tbl target = |
| let opdx = Instance.pNode target |
| osdx = Instance.sNode target |
| bad_nodes = [opdx, osdx] |
| nodes = filter (`notElem` bad_nodes) nodes_idx |
| mir_type = Instance.mirrorType target |
| use_secondary = elem osdx nodes_idx && inst_moves |
| aft_failover = if mir_type == MirrorInternal && use_secondary |
| -- if drbd and allowed to failover |
| then checkSingleStep ini_tbl target ini_tbl Failover |
| else ini_tbl |
| all_moves = |
| if disk_moves |
| then concatMap (possibleMoves mir_type use_secondary inst_moves) |
| nodes |
| else [] |
| in |
| -- iterate over the possible nodes for this instance |
| foldl' (checkSingleStep ini_tbl target) aft_failover all_moves |
| |
| -- | Compute the best next move. |
| checkMove :: [Ndx] -- ^ Allowed target node indices |
| -> Bool -- ^ Whether disk moves are allowed |
| -> Bool -- ^ Whether instance moves are allowed |
| -> Table -- ^ The current solution |
| -> [Instance.Instance] -- ^ List of instances still to move |
| -> Table -- ^ The new solution |
| checkMove nodes_idx disk_moves inst_moves ini_tbl victims = |
| let Table _ _ _ ini_plc = ini_tbl |
| -- we're using rwhnf from the Control.Parallel.Strategies |
| -- package; we don't need to use rnf as that would force too |
| -- much evaluation in single-threaded cases, and in |
| -- multi-threaded case the weak head normal form is enough to |
| -- spark the evaluation |
| tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves |
| inst_moves ini_tbl) |
| victims |
| -- iterate over all instances, computing the best move |
| best_tbl = foldl' compareTables ini_tbl tables |
| Table _ _ _ best_plc = best_tbl |
| in if length best_plc == length ini_plc |
| then ini_tbl -- no advancement |
| else best_tbl |
| |
| -- | Check if we are allowed to go deeper in the balancing. |
| doNextBalance :: Table -- ^ The starting table |
| -> Int -- ^ Remaining length |
| -> Score -- ^ Score at which to stop |
| -> Bool -- ^ The resulting table and commands |
| doNextBalance ini_tbl max_rounds min_score = |
| let Table _ _ ini_cv ini_plc = ini_tbl |
| ini_plc_len = length ini_plc |
| in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score |
| |
| -- | Run a balance move. |
| tryBalance :: Table -- ^ The starting table |
| -> Bool -- ^ Allow disk moves |
| -> Bool -- ^ Allow instance moves |
| -> Bool -- ^ Only evacuate moves |
| -> Score -- ^ Min gain threshold |
| -> Score -- ^ Min gain |
| -> Maybe Table -- ^ The resulting table and commands |
| tryBalance ini_tbl disk_moves inst_moves evac_mode mg_limit min_gain = |
| let Table ini_nl ini_il ini_cv _ = ini_tbl |
| all_inst = Container.elems ini_il |
| all_nodes = Container.elems ini_nl |
| (offline_nodes, online_nodes) = partition Node.offline all_nodes |
| all_inst' = if evac_mode |
| then let bad_nodes = map Node.idx offline_nodes |
| in filter (any (`elem` bad_nodes) . |
| Instance.allNodes) all_inst |
| else all_inst |
| reloc_inst = filter (\i -> Instance.movable i && |
| Instance.autoBalance i) all_inst' |
| node_idx = map Node.idx online_nodes |
| fin_tbl = checkMove node_idx disk_moves inst_moves ini_tbl reloc_inst |
| (Table _ _ fin_cv _) = fin_tbl |
| in |
| if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain) |
| then Just fin_tbl -- this round made success, return the new table |
| else Nothing |
| |
| -- * Allocation functions |
| |
| -- | Build failure stats out of a list of failures. |
| collapseFailures :: [FailMode] -> FailStats |
| collapseFailures flst = |
| map (\k -> (k, foldl' (\a e -> if e == k then a + 1 else a) 0 flst)) |
| [minBound..maxBound] |
| |
| -- | Compares two Maybe AllocElement and chooses the best score. |
| bestAllocElement :: Maybe Node.AllocElement |
| -> Maybe Node.AllocElement |
| -> Maybe Node.AllocElement |
| bestAllocElement a Nothing = a |
| bestAllocElement Nothing b = b |
| bestAllocElement a@(Just (_, _, _, ascore)) b@(Just (_, _, _, bscore)) = |
| if ascore < bscore then a else b |
| |
| -- | Update current Allocation solution and failure stats with new |
| -- elements. |
| concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution |
| concatAllocs as (Bad reason) = as { asFailures = reason : asFailures as } |
| |
| concatAllocs as (Ok ns) = |
| let -- Choose the old or new solution, based on the cluster score |
| cntok = asAllocs as |
| osols = asSolution as |
| nsols = bestAllocElement osols (Just ns) |
| nsuc = cntok + 1 |
| -- Note: we force evaluation of nsols here in order to keep the |
| -- memory profile low - we know that we will need nsols for sure |
| -- in the next cycle, so we force evaluation of nsols, since the |
| -- foldl' in the caller will only evaluate the tuple, but not the |
| -- elements of the tuple |
| in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolution = nsols } |
| |
| -- | Sums two 'AllocSolution' structures. |
| sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution |
| sumAllocs (AllocSolution aFails aAllocs aSols aLog) |
| (AllocSolution bFails bAllocs bSols bLog) = |
| -- note: we add b first, since usually it will be smaller; when |
| -- fold'ing, a will grow and grow whereas b is the per-group |
| -- result, hence smaller |
| let nFails = bFails ++ aFails |
| nAllocs = aAllocs + bAllocs |
| nSols = bestAllocElement aSols bSols |
| nLog = bLog ++ aLog |
| in AllocSolution nFails nAllocs nSols nLog |
| |
| -- | Given a solution, generates a reasonable description for it. |
| describeSolution :: AllocSolution -> String |
| describeSolution as = |
| let fcnt = asFailures as |
| sols = asSolution as |
| freasons = |
| intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) . |
| filter ((> 0) . snd) . collapseFailures $ fcnt |
| in case sols of |
| Nothing -> "No valid allocation solutions, failure reasons: " ++ |
| (if null fcnt then "unknown reasons" else freasons) |
| Just (_, _, nodes, cv) -> |
| printf ("score: %.8f, successes %d, failures %d (%s)" ++ |
| " for node(s) %s") cv (asAllocs as) (length fcnt) freasons |
| (intercalate "/" . map Node.name $ nodes) |
| |
| -- | Annotates a solution with the appropriate string. |
| annotateSolution :: AllocSolution -> AllocSolution |
| annotateSolution as = as { asLog = describeSolution as : asLog as } |
| |
| -- | Reverses an evacuation solution. |
| -- |
| -- Rationale: we always concat the results to the top of the lists, so |
| -- for proper jobset execution, we should reverse all lists. |
| reverseEvacSolution :: EvacSolution -> EvacSolution |
| reverseEvacSolution (EvacSolution f m o) = |
| EvacSolution (reverse f) (reverse m) (reverse o) |
| |
| -- | Generate the valid node allocation singles or pairs for a new instance. |
| genAllocNodes :: Group.List -- ^ Group list |
| -> Node.List -- ^ The node map |
| -> Int -- ^ The number of nodes required |
| -> Bool -- ^ Whether to drop or not |
| -- unallocable nodes |
| -> Result AllocNodes -- ^ The (monadic) result |
| genAllocNodes gl nl count drop_unalloc = |
| let filter_fn = if drop_unalloc |
| then filter (Group.isAllocable . |
| flip Container.find gl . Node.group) |
| else id |
| all_nodes = filter_fn $ getOnline nl |
| all_pairs = [(Node.idx p, |
| [Node.idx s | s <- all_nodes, |
| Node.idx p /= Node.idx s, |
| Node.group p == Node.group s]) | |
| p <- all_nodes] |
| in case count of |
| 1 -> Ok (Left (map Node.idx all_nodes)) |
| 2 -> Ok (Right (filter (not . null . snd) all_pairs)) |
| _ -> Bad "Unsupported number of nodes, only one or two supported" |
| |
| -- | Try to allocate an instance on the cluster. |
| tryAlloc :: (Monad m) => |
| Node.List -- ^ The node list |
| -> Instance.List -- ^ The instance list |
| -> Instance.Instance -- ^ The instance to allocate |
| -> AllocNodes -- ^ The allocation targets |
| -> m AllocSolution -- ^ Possible solution list |
| tryAlloc _ _ _ (Right []) = fail "Not enough online nodes" |
| tryAlloc nl _ inst (Right ok_pairs) = |
| let psols = parMap rwhnf (\(p, ss) -> |
| foldl' (\cstate -> |
| concatAllocs cstate . |
| allocateOnPair nl inst p) |
| emptyAllocSolution ss) ok_pairs |
| sols = foldl' sumAllocs emptyAllocSolution psols |
| in return $ annotateSolution sols |
| |
| tryAlloc _ _ _ (Left []) = fail "No online nodes" |
| tryAlloc nl _ inst (Left all_nodes) = |
| let sols = foldl' (\cstate -> |
| concatAllocs cstate . allocateOnSingle nl inst |
| ) emptyAllocSolution all_nodes |
| in return $ annotateSolution sols |
| |
| -- | Given a group/result, describe it as a nice (list of) messages. |
| solutionDescription :: (Group.Group, Result AllocSolution) |
| -> [String] |
| solutionDescription (grp, result) = |
| case result of |
| Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution) |
| Bad message -> [printf "Group %s: error %s" gname message] |
| where gname = Group.name grp |
| pol = allocPolicyToRaw (Group.allocPolicy grp) |
| |
| -- | From a list of possibly bad and possibly empty solutions, filter |
| -- only the groups with a valid result. Note that the result will be |
| -- reversed compared to the original list. |
| filterMGResults :: [(Group.Group, Result AllocSolution)] |
| -> [(Group.Group, AllocSolution)] |
| filterMGResults = foldl' fn [] |
| where unallocable = not . Group.isAllocable |
| fn accu (grp, rasol) = |
| case rasol of |
| Bad _ -> accu |
| Ok sol | isNothing (asSolution sol) -> accu |
| | unallocable grp -> accu |
| | otherwise -> (grp, sol):accu |
| |
| -- | Sort multigroup results based on policy and score. |
| sortMGResults :: [(Group.Group, AllocSolution)] |
| -> [(Group.Group, AllocSolution)] |
| sortMGResults sols = |
| let extractScore (_, _, _, x) = x |
| solScore (grp, sol) = (Group.allocPolicy grp, |
| (extractScore . fromJust . asSolution) sol) |
| in sortBy (comparing solScore) sols |
| |
| -- | Removes node groups which can't accommodate the instance |
| filterValidGroups :: [(Group.Group, (Node.List, Instance.List))] |
| -> Instance.Instance |
| -> ([(Group.Group, (Node.List, Instance.List))], [String]) |
| filterValidGroups [] _ = ([], []) |
| filterValidGroups (ng:ngs) inst = |
| let (valid_ngs, msgs) = filterValidGroups ngs inst |
| hasNetwork nic = case Nic.network nic of |
| Just net -> net `elem` Group.networks (fst ng) |
| Nothing -> True |
| hasRequiredNetworks = all hasNetwork (Instance.nics inst) |
| in if hasRequiredNetworks |
| then (ng:valid_ngs, msgs) |
| else (valid_ngs, |
| ("group " ++ Group.name (fst ng) ++ |
| " is not connected to a network required by instance " ++ |
| Instance.name inst):msgs) |
| |
| -- | Finds the best group for an instance on a multi-group cluster. |
| -- |
| -- Only solutions in @preferred@ and @last_resort@ groups will be |
| -- accepted as valid, and additionally if the allowed groups parameter |
| -- is not null then allocation will only be run for those group |
| -- indices. |
| findBestAllocGroup :: Group.List -- ^ The group list |
| -> Node.List -- ^ The node list |
| -> Instance.List -- ^ The instance list |
| -> Maybe [Gdx] -- ^ The allowed groups |
| -> Instance.Instance -- ^ The instance to allocate |
| -> Int -- ^ Required number of nodes |
| -> Result (Group.Group, AllocSolution, [String]) |
| findBestAllocGroup mggl mgnl mgil allowed_gdxs inst cnt = |
| let groups_by_idx = splitCluster mgnl mgil |
| groups = map (\(gid, d) -> (Container.find gid mggl, d)) groups_by_idx |
| groups' = maybe groups |
| (\gs -> filter ((`elem` gs) . Group.idx . fst) groups) |
| allowed_gdxs |
| (groups'', filter_group_msgs) = filterValidGroups groups' inst |
| sols = map (\(gr, (nl, il)) -> |
| (gr, genAllocNodes mggl nl cnt False >>= |
| tryAlloc nl il inst)) |
| groups''::[(Group.Group, Result AllocSolution)] |
| all_msgs = filter_group_msgs ++ concatMap solutionDescription sols |
| goodSols = filterMGResults sols |
| sortedSols = sortMGResults goodSols |
| in case sortedSols of |
| [] -> Bad $ if null groups' |
| then "no groups for evacuation: allowed groups was" ++ |
| show allowed_gdxs ++ ", all groups: " ++ |
| show (map fst groups) |
| else intercalate ", " all_msgs |
| (final_group, final_sol):_ -> return (final_group, final_sol, all_msgs) |
| |
| -- | Try to allocate an instance on a multi-group cluster. |
| tryMGAlloc :: Group.List -- ^ The group list |
| -> Node.List -- ^ The node list |
| -> Instance.List -- ^ The instance list |
| -> Instance.Instance -- ^ The instance to allocate |
| -> Int -- ^ Required number of nodes |
| -> Result AllocSolution -- ^ Possible solution list |
| tryMGAlloc mggl mgnl mgil inst cnt = do |
| (best_group, solution, all_msgs) <- |
| findBestAllocGroup mggl mgnl mgil Nothing inst cnt |
| let group_name = Group.name best_group |
| selmsg = "Selected group: " ++ group_name |
| return $ solution { asLog = selmsg:all_msgs } |
| |
| -- | Calculate the new instance list after allocation solution. |
| updateIl :: Instance.List -- ^ The original instance list |
| -> Maybe Node.AllocElement -- ^ The result of the allocation attempt |
| -> Instance.List -- ^ The updated instance list |
| updateIl il Nothing = il |
| updateIl il (Just (_, xi, _, _)) = Container.add (Container.size il) xi il |
| |
| -- | Extract the the new node list from the allocation solution. |
| extractNl :: Node.List -- ^ The original node list |
| -> Maybe Node.AllocElement -- ^ The result of the allocation attempt |
| -> Node.List -- ^ The new node list |
| extractNl nl Nothing = nl |
| extractNl _ (Just (xnl, _, _, _)) = xnl |
| |
| -- | Try to allocate a list of instances on a multi-group cluster. |
| allocList :: Group.List -- ^ The group list |
| -> Node.List -- ^ The node list |
| -> Instance.List -- ^ The instance list |
| -> [(Instance.Instance, Int)] -- ^ The instance to allocate |
| -> AllocSolutionList -- ^ Possible solution list |
| -> Result (Node.List, Instance.List, |
| AllocSolutionList) -- ^ The final solution list |
| allocList _ nl il [] result = Ok (nl, il, result) |
| allocList gl nl il ((xi, xicnt):xies) result = do |
| ares <- tryMGAlloc gl nl il xi xicnt |
| let sol = asSolution ares |
| nl' = extractNl nl sol |
| il' = updateIl il sol |
| allocList gl nl' il' xies ((xi, ares):result) |
| |
| -- | Function which fails if the requested mode is change secondary. |
| -- |
| -- This is useful since except DRBD, no other disk template can |
| -- execute change secondary; thus, we can just call this function |
| -- instead of always checking for secondary mode. After the call to |
| -- this function, whatever mode we have is just a primary change. |
| failOnSecondaryChange :: (Monad m) => EvacMode -> DiskTemplate -> m () |
| failOnSecondaryChange ChangeSecondary dt = |
| fail $ "Instances with disk template '" ++ diskTemplateToRaw dt ++ |
| "' can't execute change secondary" |
| failOnSecondaryChange _ _ = return () |
| |
| -- | Run evacuation for a single instance. |
| -- |
| -- /Note:/ this function should correctly execute both intra-group |
| -- evacuations (in all modes) and inter-group evacuations (in the |
| -- 'ChangeAll' mode). Of course, this requires that the correct list |
| -- of target nodes is passed. |
| nodeEvacInstance :: Node.List -- ^ The node list (cluster-wide) |
| -> Instance.List -- ^ Instance list (cluster-wide) |
| -> EvacMode -- ^ The evacuation mode |
| -> Instance.Instance -- ^ The instance to be evacuated |
| -> Gdx -- ^ The group we're targetting |
| -> [Ndx] -- ^ The list of available nodes |
| -- for allocation |
| -> Result (Node.List, Instance.List, [OpCodes.OpCode]) |
| nodeEvacInstance nl il mode inst@(Instance.Instance |
| {Instance.diskTemplate = dt@DTDiskless}) |
| gdx avail_nodes = |
| failOnSecondaryChange mode dt >> |
| evacOneNodeOnly nl il inst gdx avail_nodes |
| |
| nodeEvacInstance _ _ _ (Instance.Instance |
| {Instance.diskTemplate = DTPlain}) _ _ = |
| fail "Instances of type plain cannot be relocated" |
| |
| nodeEvacInstance _ _ _ (Instance.Instance |
| {Instance.diskTemplate = DTFile}) _ _ = |
| fail "Instances of type file cannot be relocated" |
| |
| nodeEvacInstance nl il mode inst@(Instance.Instance |
| {Instance.diskTemplate = dt@DTSharedFile}) |
| gdx avail_nodes = |
| failOnSecondaryChange mode dt >> |
| evacOneNodeOnly nl il inst gdx avail_nodes |
| |
| nodeEvacInstance nl il mode inst@(Instance.Instance |
| {Instance.diskTemplate = dt@DTBlock}) |
| gdx avail_nodes = |
| failOnSecondaryChange mode dt >> |
| evacOneNodeOnly nl il inst gdx avail_nodes |
| |
| nodeEvacInstance nl il mode inst@(Instance.Instance |
| {Instance.diskTemplate = dt@DTRbd}) |
| gdx avail_nodes = |
| failOnSecondaryChange mode dt >> |
| evacOneNodeOnly nl il inst gdx avail_nodes |
| |
| nodeEvacInstance nl il mode inst@(Instance.Instance |
| {Instance.diskTemplate = dt@DTExt}) |
| gdx avail_nodes = |
| failOnSecondaryChange mode dt >> |
| evacOneNodeOnly nl il inst gdx avail_nodes |
| |
| nodeEvacInstance nl il ChangePrimary |
| inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8}) |
| _ _ = |
| do |
| (nl', inst', _, _) <- opToResult $ applyMove nl inst Failover |
| let idx = Instance.idx inst |
| il' = Container.add idx inst' il |
| ops = iMoveToJob nl' il' idx Failover |
| return (nl', il', ops) |
| |
| nodeEvacInstance nl il ChangeSecondary |
| inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8}) |
| gdx avail_nodes = |
| evacOneNodeOnly nl il inst gdx avail_nodes |
| |
| -- The algorithm for ChangeAll is as follows: |
| -- |
| -- * generate all (primary, secondary) node pairs for the target groups |
| -- * for each pair, execute the needed moves (r:s, f, r:s) and compute |
| -- the final node list state and group score |
| -- * select the best choice via a foldl that uses the same Either |
| -- String solution as the ChangeSecondary mode |
| nodeEvacInstance nl il ChangeAll |
| inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8}) |
| gdx avail_nodes = |
| do |
| let no_nodes = Left "no nodes available" |
| node_pairs = [(p,s) | p <- avail_nodes, s <- avail_nodes, p /= s] |
| (nl', il', ops, _) <- |
| annotateResult "Can't find any good nodes for relocation" . |
| eitherToResult $ |
| foldl' |
| (\accu nodes -> case evacDrbdAllInner nl il inst gdx nodes of |
| Bad msg -> |
| case accu of |
| Right _ -> accu |
| -- we don't need more details (which |
| -- nodes, etc.) as we only selected |
| -- this group if we can allocate on |
| -- it, hence failures will not |
| -- propagate out of this fold loop |
| Left _ -> Left $ "Allocation failed: " ++ msg |
| Ok result@(_, _, _, new_cv) -> |
| let new_accu = Right result in |
| case accu of |
| Left _ -> new_accu |
| Right (_, _, _, old_cv) -> |
| if old_cv < new_cv |
| then accu |
| else new_accu |
| ) no_nodes node_pairs |
| |
| return (nl', il', ops) |
| |
| -- | Generic function for changing one node of an instance. |
| -- |
| -- This is similar to 'nodeEvacInstance' but will be used in a few of |
| -- its sub-patterns. It folds the inner function 'evacOneNodeInner' |
| -- over the list of available nodes, which results in the best choice |
| -- for relocation. |
| evacOneNodeOnly :: Node.List -- ^ The node list (cluster-wide) |
| -> Instance.List -- ^ Instance list (cluster-wide) |
| -> Instance.Instance -- ^ The instance to be evacuated |
| -> Gdx -- ^ The group we're targetting |
| -> [Ndx] -- ^ The list of available nodes |
| -- for allocation |
| -> Result (Node.List, Instance.List, [OpCodes.OpCode]) |
| evacOneNodeOnly nl il inst gdx avail_nodes = do |
| op_fn <- case Instance.mirrorType inst of |
| MirrorNone -> Bad "Can't relocate/evacuate non-mirrored instances" |
| MirrorInternal -> Ok ReplaceSecondary |
| MirrorExternal -> Ok FailoverToAny |
| (nl', inst', _, ndx) <- annotateResult "Can't find any good node" . |
| eitherToResult $ |
| foldl' (evacOneNodeInner nl inst gdx op_fn) |
| (Left "no nodes available") avail_nodes |
| let idx = Instance.idx inst |
| il' = Container.add idx inst' il |
| ops = iMoveToJob nl' il' idx (op_fn ndx) |
| return (nl', il', ops) |
| |
| -- | Inner fold function for changing one node of an instance. |
| -- |
| -- Depending on the instance disk template, this will either change |
| -- the secondary (for DRBD) or the primary node (for shared |
| -- storage). However, the operation is generic otherwise. |
| -- |
| -- The running solution is either a @Left String@, which means we |
| -- don't have yet a working solution, or a @Right (...)@, which |
| -- represents a valid solution; it holds the modified node list, the |
| -- modified instance (after evacuation), the score of that solution, |
| -- and the new secondary node index. |
| evacOneNodeInner :: Node.List -- ^ Cluster node list |
| -> Instance.Instance -- ^ Instance being evacuated |
| -> Gdx -- ^ The group index of the instance |
| -> (Ndx -> IMove) -- ^ Operation constructor |
| -> EvacInnerState -- ^ Current best solution |
| -> Ndx -- ^ Node we're evaluating as target |
| -> EvacInnerState -- ^ New best solution |
| evacOneNodeInner nl inst gdx op_fn accu ndx = |
| case applyMove nl inst (op_fn ndx) of |
| Bad fm -> let fail_msg = "Node " ++ Container.nameOf nl ndx ++ |
| " failed: " ++ show fm |
| in either (const $ Left fail_msg) (const accu) accu |
| Ok (nl', inst', _, _) -> |
| let nodes = Container.elems nl' |
| -- The fromJust below is ugly (it can fail nastily), but |
| -- at this point we should have any internal mismatches, |
| -- and adding a monad here would be quite involved |
| grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes) |
| new_cv = compCVNodes grpnodes |
| new_accu = Right (nl', inst', new_cv, ndx) |
| in case accu of |
| Left _ -> new_accu |
| Right (_, _, old_cv, _) -> |
| if old_cv < new_cv |
| then accu |
| else new_accu |
| |
| -- | Compute result of changing all nodes of a DRBD instance. |
| -- |
| -- Given the target primary and secondary node (which might be in a |
| -- different group or not), this function will 'execute' all the |
| -- required steps and assuming all operations succceed, will return |
| -- the modified node and instance lists, the opcodes needed for this |
| -- and the new group score. |
| evacDrbdAllInner :: Node.List -- ^ Cluster node list |
| -> Instance.List -- ^ Cluster instance list |
| -> Instance.Instance -- ^ The instance to be moved |
| -> Gdx -- ^ The target group index |
| -- (which can differ from the |
| -- current group of the |
| -- instance) |
| -> (Ndx, Ndx) -- ^ Tuple of new |
| -- primary\/secondary nodes |
| -> Result (Node.List, Instance.List, [OpCodes.OpCode], Score) |
| evacDrbdAllInner nl il inst gdx (t_pdx, t_sdx) = do |
| let primary = Container.find (Instance.pNode inst) nl |
| idx = Instance.idx inst |
| -- if the primary is offline, then we first failover |
| (nl1, inst1, ops1) <- |
| if Node.offline primary |
| then do |
| (nl', inst', _, _) <- |
| annotateResult "Failing over to the secondary" . |
| opToResult $ applyMove nl inst Failover |
| return (nl', inst', [Failover]) |
| else return (nl, inst, []) |
| let (o1, o2, o3) = (ReplaceSecondary t_pdx, |
| Failover, |
| ReplaceSecondary t_sdx) |
| -- we now need to execute a replace secondary to the future |
| -- primary node |
| (nl2, inst2, _, _) <- |
| annotateResult "Changing secondary to new primary" . |
| opToResult $ |
| applyMove nl1 inst1 o1 |
| let ops2 = o1:ops1 |
| -- we now execute another failover, the primary stays fixed now |
| (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" . |
| opToResult $ applyMove nl2 inst2 o2 |
| let ops3 = o2:ops2 |
| -- and finally another replace secondary, to the final secondary |
| (nl4, inst4, _, _) <- |
| annotateResult "Changing secondary to final secondary" . |
| opToResult $ |
| applyMove nl3 inst3 o3 |
| let ops4 = o3:ops3 |
| il' = Container.add idx inst4 il |
| ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4 |
| let nodes = Container.elems nl4 |
| -- The fromJust below is ugly (it can fail nastily), but |
| -- at this point we should have any internal mismatches, |
| -- and adding a monad here would be quite involved |
| grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes) |
| new_cv = compCVNodes grpnodes |
| return (nl4, il', ops, new_cv) |
| |
| -- | Computes the nodes in a given group which are available for |
| -- allocation. |
| availableGroupNodes :: [(Gdx, [Ndx])] -- ^ Group index/node index assoc list |
| -> IntSet.IntSet -- ^ Nodes that are excluded |
| -> Gdx -- ^ The group for which we |
| -- query the nodes |
| -> Result [Ndx] -- ^ List of available node indices |
| availableGroupNodes group_nodes excl_ndx gdx = do |
| local_nodes <- maybe (Bad $ "Can't find group with index " ++ show gdx) |
| Ok (lookup gdx group_nodes) |
| let avail_nodes = filter (not . flip IntSet.member excl_ndx) local_nodes |
| return avail_nodes |
| |
| -- | Updates the evac solution with the results of an instance |
| -- evacuation. |
| updateEvacSolution :: (Node.List, Instance.List, EvacSolution) |
| -> Idx |
| -> Result (Node.List, Instance.List, [OpCodes.OpCode]) |
| -> (Node.List, Instance.List, EvacSolution) |
| updateEvacSolution (nl, il, es) idx (Bad msg) = |
| (nl, il, es { esFailed = (idx, msg):esFailed es}) |
| updateEvacSolution (_, _, es) idx (Ok (nl, il, opcodes)) = |
| (nl, il, es { esMoved = new_elem:esMoved es |
| , esOpCodes = opcodes:esOpCodes es }) |
| where inst = Container.find idx il |
| new_elem = (idx, |
| instancePriGroup nl inst, |
| Instance.allNodes inst) |
| |
| -- | Node-evacuation IAllocator mode main function. |
| tryNodeEvac :: Group.List -- ^ The cluster groups |
| -> Node.List -- ^ The node list (cluster-wide, not per group) |
| -> Instance.List -- ^ Instance list (cluster-wide) |
| -> EvacMode -- ^ The evacuation mode |
| -> [Idx] -- ^ List of instance (indices) to be evacuated |
| -> Result (Node.List, Instance.List, EvacSolution) |
| tryNodeEvac _ ini_nl ini_il mode idxs = |
| let evac_ndx = nodesToEvacuate ini_il mode idxs |
| offline = map Node.idx . filter Node.offline $ Container.elems ini_nl |
| excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline |
| group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx |
| (Container.elems nl))) $ |
| splitCluster ini_nl ini_il |
| (fin_nl, fin_il, esol) = |
| foldl' (\state@(nl, il, _) inst -> |
| let gdx = instancePriGroup nl inst |
| pdx = Instance.pNode inst in |
| updateEvacSolution state (Instance.idx inst) $ |
| availableGroupNodes group_ndx |
| (IntSet.insert pdx excl_ndx) gdx >>= |
| nodeEvacInstance nl il mode inst gdx |
| ) |
| (ini_nl, ini_il, emptyEvacSolution) |
| (map (`Container.find` ini_il) idxs) |
| in return (fin_nl, fin_il, reverseEvacSolution esol) |
| |
| -- | Change-group IAllocator mode main function. |
| -- |
| -- This is very similar to 'tryNodeEvac', the only difference is that |
| -- we don't choose as target group the current instance group, but |
| -- instead: |
| -- |
| -- 1. at the start of the function, we compute which are the target |
| -- groups; either no groups were passed in, in which case we choose |
| -- all groups out of which we don't evacuate instance, or there were |
| -- some groups passed, in which case we use those |
| -- |
| -- 2. for each instance, we use 'findBestAllocGroup' to choose the |
| -- best group to hold the instance, and then we do what |
| -- 'tryNodeEvac' does, except for this group instead of the current |
| -- instance group. |
| -- |
| -- Note that the correct behaviour of this function relies on the |
| -- function 'nodeEvacInstance' to be able to do correctly both |
| -- intra-group and inter-group moves when passed the 'ChangeAll' mode. |
| tryChangeGroup :: Group.List -- ^ The cluster groups |
| -> Node.List -- ^ The node list (cluster-wide) |
| -> Instance.List -- ^ Instance list (cluster-wide) |
| -> [Gdx] -- ^ Target groups; if empty, any |
| -- groups not being evacuated |
| -> [Idx] -- ^ List of instance (indices) to be evacuated |
| -> Result (Node.List, Instance.List, EvacSolution) |
| tryChangeGroup gl ini_nl ini_il gdxs idxs = |
| let evac_gdxs = nub $ map (instancePriGroup ini_nl . |
| flip Container.find ini_il) idxs |
| target_gdxs = (if null gdxs |
| then Container.keys gl |
| else gdxs) \\ evac_gdxs |
| offline = map Node.idx . filter Node.offline $ Container.elems ini_nl |
| excl_ndx = foldl' (flip IntSet.insert) IntSet.empty offline |
| group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx |
| (Container.elems nl))) $ |
| splitCluster ini_nl ini_il |
| (fin_nl, fin_il, esol) = |
| foldl' (\state@(nl, il, _) inst -> |
| let solution = do |
| let ncnt = Instance.requiredNodes $ |
| Instance.diskTemplate inst |
| (grp, _, _) <- findBestAllocGroup gl nl il |
| (Just target_gdxs) inst ncnt |
| let gdx = Group.idx grp |
| av_nodes <- availableGroupNodes group_ndx |
| excl_ndx gdx |
| nodeEvacInstance nl il ChangeAll inst gdx av_nodes |
| in updateEvacSolution state (Instance.idx inst) solution |
| ) |
| (ini_nl, ini_il, emptyEvacSolution) |
| (map (`Container.find` ini_il) idxs) |
| in return (fin_nl, fin_il, reverseEvacSolution esol) |
| |
| -- | Standard-sized allocation method. |
| -- |
| -- This places instances of the same size on the cluster until we're |
| -- out of space. The result will be a list of identically-sized |
| -- instances. |
| iterateAlloc :: AllocMethod |
| iterateAlloc nl il limit newinst allocnodes ixes cstats = |
| let depth = length ixes |
| newname = printf "new-%d" depth::String |
| newidx = Container.size il |
| newi2 = Instance.setIdx (Instance.setName newinst newname) newidx |
| newlimit = fmap (flip (-) 1) limit |
| in case tryAlloc nl il newi2 allocnodes of |
| Bad s -> Bad s |
| Ok (AllocSolution { asFailures = errs, asSolution = sols3 }) -> |
| let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in |
| case sols3 of |
| Nothing -> newsol |
| Just (xnl, xi, _, _) -> |
| if limit == Just 0 |
| then newsol |
| else iterateAlloc xnl (Container.add newidx xi il) |
| newlimit newinst allocnodes (xi:ixes) |
| (totalResources xnl:cstats) |
| |
| -- | Predicate whether shrinking a single resource can lead to a valid |
| -- allocation. |
| sufficesShrinking :: (Instance.Instance -> AllocSolution) -> Instance.Instance |
| -> FailMode -> Maybe Instance.Instance |
| sufficesShrinking allocFn inst fm = |
| case dropWhile (isNothing . asSolution . fst) |
| . takeWhile (liftA2 (||) (elem fm . asFailures . fst) |
| (isJust . asSolution . fst)) |
| . map (allocFn &&& id) $ |
| iterateOk (`Instance.shrinkByType` fm) inst |
| of x:_ -> Just . snd $ x |
| _ -> Nothing |
| |
| -- | Tiered allocation method. |
| -- |
| -- This places instances on the cluster, and decreases the spec until |
| -- we can allocate again. The result will be a list of decreasing |
| -- instance specs. |
| tieredAlloc :: AllocMethod |
| tieredAlloc nl il limit newinst allocnodes ixes cstats = |
| case iterateAlloc nl il limit newinst allocnodes ixes cstats of |
| Bad s -> Bad s |
| Ok (errs, nl', il', ixes', cstats') -> |
| let newsol = Ok (errs, nl', il', ixes', cstats') |
| ixes_cnt = length ixes' |
| (stop, newlimit) = case limit of |
| Nothing -> (False, Nothing) |
| Just n -> (n <= ixes_cnt, |
| Just (n - ixes_cnt)) |
| sortedErrs = map fst $ sortBy (comparing snd) errs |
| suffShrink = sufficesShrinking (fromMaybe emptyAllocSolution |
| . flip (tryAlloc nl' il') allocnodes) |
| newinst |
| bigSteps = filter isJust . map suffShrink . reverse $ sortedErrs |
| in if stop then newsol else |
| case bigSteps of |
| Just newinst':_ -> tieredAlloc nl' il' newlimit |
| newinst' allocnodes ixes' cstats' |
| _ -> case Instance.shrinkByType newinst . last $ sortedErrs of |
| Bad _ -> newsol |
| Ok newinst' -> tieredAlloc nl' il' newlimit |
| newinst' allocnodes ixes' cstats' |
| |
| -- * Formatting functions |
| |
| -- | Given the original and final nodes, computes the relocation description. |
| computeMoves :: Instance.Instance -- ^ The instance to be moved |
| -> String -- ^ The instance name |
| -> IMove -- ^ The move being performed |
| -> String -- ^ New primary |
| -> String -- ^ New secondary |
| -> (String, [String]) |
| -- ^ Tuple of moves and commands list; moves is containing |
| -- either @/f/@ for failover or @/r:name/@ for replace |
| -- secondary, while the command list holds gnt-instance |
| -- commands (without that prefix), e.g \"@failover instance1@\" |
| computeMoves i inam mv c d = |
| case mv of |
| Failover -> ("f", [mig]) |
| FailoverToAny _ -> (printf "fa:%s" c, [mig_any]) |
| FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d]) |
| ReplaceSecondary _ -> (printf "r:%s" d, [rep d]) |
| ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig]) |
| ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig]) |
| where morf = if Instance.isRunning i then "migrate" else "failover" |
| mig = printf "%s -f %s" morf inam::String |
| mig_any = printf "%s -f -n %s %s" morf c inam::String |
| rep n = printf "replace-disks -n %s %s" n inam::String |
| |
| -- | Converts a placement to string format. |
| printSolutionLine :: Node.List -- ^ The node list |
| -> Instance.List -- ^ The instance list |
| -> Int -- ^ Maximum node name length |
| -> Int -- ^ Maximum instance name length |
| -> Placement -- ^ The current placement |
| -> Int -- ^ The index of the placement in |
| -- the solution |
| -> (String, [String]) |
| printSolutionLine nl il nmlen imlen plc pos = |
| let pmlen = (2*nmlen + 1) |
| (i, p, s, mv, c) = plc |
| old_sec = Instance.sNode inst |
| inst = Container.find i il |
| inam = Instance.alias inst |
| npri = Node.alias $ Container.find p nl |
| nsec = Node.alias $ Container.find s nl |
| opri = Node.alias $ Container.find (Instance.pNode inst) nl |
| osec = Node.alias $ Container.find old_sec nl |
| (moves, cmds) = computeMoves inst inam mv npri nsec |
| -- FIXME: this should check instead/also the disk template |
| ostr = if old_sec == Node.noSecondary |
| then printf "%s" opri::String |
| else printf "%s:%s" opri osec::String |
| nstr = if s == Node.noSecondary |
| then printf "%s" npri::String |
| else printf "%s:%s" npri nsec::String |
| in (printf " %3d. %-*s %-*s => %-*s %12.8f a=%s" |
| pos imlen inam pmlen ostr pmlen nstr c moves, |
| cmds) |
| |
| -- | Return the instance and involved nodes in an instance move. |
| -- |
| -- Note that the output list length can vary, and is not required nor |
| -- guaranteed to be of any specific length. |
| involvedNodes :: Instance.List -- ^ Instance list, used for retrieving |
| -- the instance from its index; note |
| -- that this /must/ be the original |
| -- instance list, so that we can |
| -- retrieve the old nodes |
| -> Placement -- ^ The placement we're investigating, |
| -- containing the new nodes and |
| -- instance index |
| -> [Ndx] -- ^ Resulting list of node indices |
| involvedNodes il plc = |
| let (i, np, ns, _, _) = plc |
| inst = Container.find i il |
| in nub $ [np, ns] ++ Instance.allNodes inst |
| |
| -- | Inner function for splitJobs, that either appends the next job to |
| -- the current jobset, or starts a new jobset. |
| mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx]) |
| mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx) |
| mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _) |
| | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf) |
| | otherwise = ([n]:cjs, ndx) |
| |
| -- | Break a list of moves into independent groups. Note that this |
| -- will reverse the order of jobs. |
| splitJobs :: [MoveJob] -> [JobSet] |
| splitJobs = fst . foldl mergeJobs ([], []) |
| |
| -- | Given a list of commands, prefix them with @gnt-instance@ and |
| -- also beautify the display a little. |
| formatJob :: Int -> Int -> (Int, MoveJob) -> [String] |
| formatJob jsn jsl (sn, (_, _, _, cmds)) = |
| let out = |
| printf " echo job %d/%d" jsn sn: |
| printf " check": |
| map (" gnt-instance " ++) cmds |
| in if sn == 1 |
| then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out |
| else out |
| |
| -- | Given a list of commands, prefix them with @gnt-instance@ and |
| -- also beautify the display a little. |
| formatCmds :: [JobSet] -> String |
| formatCmds = |
| unlines . |
| concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js)) |
| (zip [1..] js)) . |
| zip [1..] |
| |
| -- | Print the node list. |
| printNodes :: Node.List -> [String] -> String |
| printNodes nl fs = |
| let fields = case fs of |
| [] -> Node.defaultFields |
| "+":rest -> Node.defaultFields ++ rest |
| _ -> fs |
| snl = sortBy (comparing Node.idx) (Container.elems nl) |
| (header, isnum) = unzip $ map Node.showHeader fields |
| in printTable "" header (map (Node.list fields) snl) isnum |
| |
| -- | Print the instance list. |
| printInsts :: Node.List -> Instance.List -> String |
| printInsts nl il = |
| let sil = sortBy (comparing Instance.idx) (Container.elems il) |
| helper inst = [ if Instance.isRunning inst then "R" else " " |
| , Instance.name inst |
| , Container.nameOf nl (Instance.pNode inst) |
| , let sdx = Instance.sNode inst |
| in if sdx == Node.noSecondary |
| then "" |
| else Container.nameOf nl sdx |
| , if Instance.autoBalance inst then "Y" else "N" |
| , printf "%3d" $ Instance.vcpus inst |
| , printf "%5d" $ Instance.mem inst |
| , printf "%5d" $ Instance.dsk inst `div` 1024 |
| , printf "%5.3f" lC |
| , printf "%5.3f" lM |
| , printf "%5.3f" lD |
| , printf "%5.3f" lN |
| ] |
| where DynUtil lC lM lD lN = Instance.util inst |
| header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal" |
| , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ] |
| isnum = False:False:False:False:False:repeat True |
| in printTable "" header (map helper sil) isnum |
| |
| -- | Shows statistics for a given node list. |
| printStats :: String -> Node.List -> String |
| printStats lp nl = |
| let dcvs = compDetailedCV $ Container.elems nl |
| (weights, names) = unzip detailedCVInfo |
| hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs |
| header = [ "Field", "Value", "Weight" ] |
| formatted = map (\(w, h, val) -> |
| [ h |
| , printf "%.8f" val |
| , printf "x%.2f" w |
| ]) hd |
| in printTable lp header formatted $ False:repeat True |
| |
| -- | Convert a placement into a list of OpCodes (basically a job). |
| iMoveToJob :: Node.List -- ^ The node list; only used for node |
| -- names, so any version is good |
| -- (before or after the operation) |
| -> Instance.List -- ^ The instance list; also used for |
| -- names only |
| -> Idx -- ^ The index of the instance being |
| -- moved |
| -> IMove -- ^ The actual move to be described |
| -> [OpCodes.OpCode] -- ^ The list of opcodes equivalent to |
| -- the given move |
| iMoveToJob nl il idx move = |
| let inst = Container.find idx il |
| iname = Instance.name inst |
| lookNode n = case mkNonEmpty (Container.nameOf nl n) of |
| -- FIXME: convert htools codebase to non-empty strings |
| Bad msg -> error $ "Empty node name for idx " ++ |
| show n ++ ": " ++ msg ++ "??" |
| Ok ne -> Just ne |
| opF = OpCodes.OpInstanceMigrate |
| { OpCodes.opInstanceName = iname |
| , OpCodes.opInstanceUuid = Nothing |
| , OpCodes.opMigrationMode = Nothing -- default |
| , OpCodes.opOldLiveMode = Nothing -- default as well |
| , OpCodes.opTargetNode = Nothing -- this is drbd |
| , OpCodes.opTargetNodeUuid = Nothing |
| , OpCodes.opAllowRuntimeChanges = False |
| , OpCodes.opIgnoreIpolicy = False |
| , OpCodes.opMigrationCleanup = False |
| , OpCodes.opIallocator = Nothing |
| , OpCodes.opAllowFailover = True } |
| opFA n = opF { OpCodes.opTargetNode = lookNode n } -- not drbd |
| opR n = OpCodes.OpInstanceReplaceDisks |
| { OpCodes.opInstanceName = iname |
| , OpCodes.opInstanceUuid = Nothing |
| , OpCodes.opEarlyRelease = False |
| , OpCodes.opIgnoreIpolicy = False |
| , OpCodes.opReplaceDisksMode = OpCodes.ReplaceNewSecondary |
| , OpCodes.opReplaceDisksList = [] |
| , OpCodes.opRemoteNode = lookNode n |
| , OpCodes.opRemoteNodeUuid = Nothing |
| , OpCodes.opIallocator = Nothing |
| } |
| in case move of |
| Failover -> [ opF ] |
| FailoverToAny np -> [ opFA np ] |
| ReplacePrimary np -> [ opF, opR np, opF ] |
| ReplaceSecondary ns -> [ opR ns ] |
| ReplaceAndFailover np -> [ opR np, opF ] |
| FailoverAndReplace ns -> [ opF, opR ns ] |
| |
| -- * Node group functions |
| |
| -- | Computes the group of an instance. |
| instanceGroup :: Node.List -> Instance.Instance -> Result Gdx |
| instanceGroup nl i = |
| let sidx = Instance.sNode i |
| pnode = Container.find (Instance.pNode i) nl |
| snode = if sidx == Node.noSecondary |
| then pnode |
| else Container.find sidx nl |
| pgroup = Node.group pnode |
| sgroup = Node.group snode |
| in if pgroup /= sgroup |
| then fail ("Instance placed accross two node groups, primary " ++ |
| show pgroup ++ ", secondary " ++ show sgroup) |
| else return pgroup |
| |
| -- | Computes the group of an instance per the primary node. |
| instancePriGroup :: Node.List -> Instance.Instance -> Gdx |
| instancePriGroup nl i = |
| let pnode = Container.find (Instance.pNode i) nl |
| in Node.group pnode |
| |
| -- | Compute the list of badly allocated instances (split across node |
| -- groups). |
| findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance] |
| findSplitInstances nl = |
| filter (not . isOk . instanceGroup nl) . Container.elems |
| |
| -- | Splits a cluster into the component node groups. |
| splitCluster :: Node.List -> Instance.List -> |
| [(Gdx, (Node.List, Instance.List))] |
| splitCluster nl il = |
| let ngroups = Node.computeGroups (Container.elems nl) |
| in map (\(gdx, nodes) -> |
| let nidxs = map Node.idx nodes |
| nodes' = zip nidxs nodes |
| instances = Container.filter ((`elem` nidxs) . Instance.pNode) il |
| in (gdx, (Container.fromList nodes', instances))) ngroups |
| |
| -- | Compute the list of nodes that are to be evacuated, given a list |
| -- of instances and an evacuation mode. |
| nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list |
| -> EvacMode -- ^ The evacuation mode we're using |
| -> [Idx] -- ^ List of instance indices being evacuated |
| -> IntSet.IntSet -- ^ Set of node indices |
| nodesToEvacuate il mode = |
| IntSet.delete Node.noSecondary . |
| foldl' (\ns idx -> |
| let i = Container.find idx il |
| pdx = Instance.pNode i |
| sdx = Instance.sNode i |
| dt = Instance.diskTemplate i |
| withSecondary = case dt of |
| DTDrbd8 -> IntSet.insert sdx ns |
| _ -> ns |
| in case mode of |
| ChangePrimary -> IntSet.insert pdx ns |
| ChangeSecondary -> withSecondary |
| ChangeAll -> IntSet.insert pdx withSecondary |
| ) IntSet.empty |