blob: 55f397c1c2ef38c2ec371b14d0c28f2d9e06082b [file] [log] [blame]
#!/usr/bin/python -u
#
# Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Script for doing QA on Ganeti.
"""
# pylint: disable=C0103
# due to invalid name
import copy
import datetime
import optparse
import sys
import colors
import qa_cluster
import qa_config
import qa_daemon
import qa_env
import qa_error
import qa_filters
import qa_group
import qa_instance
import qa_iptables
import qa_monitoring
import qa_network
import qa_node
import qa_os
import qa_performance
import qa_job
import qa_rapi
import qa_tags
import qa_utils
from ganeti import utils
from ganeti import rapi # pylint: disable=W0611
from ganeti import constants
from ganeti import netutils
import ganeti.rapi.client # pylint: disable=W0611
from ganeti.rapi.client import UsesRapiClient
def _FormatHeader(line, end=72, mark="-", color=None):
"""Fill a line up to the end column.
"""
line = (mark * 4) + " " + line + " "
line += "-" * (end - len(line))
line = line.rstrip()
line = colors.colorize(line, color=color)
return line
def _DescriptionOf(fn):
"""Computes the description of an item.
"""
if fn.__doc__:
desc = fn.__doc__.splitlines()[0].strip()
desc = desc.rstrip(".")
if fn.__name__:
desc = "[" + fn.__name__ + "] " + desc
else:
desc = "%r" % fn
return desc
def RunTest(fn, *args, **kwargs):
"""Runs a test after printing a header.
"""
tstart = datetime.datetime.now()
desc = _DescriptionOf(fn)
print
print _FormatHeader("%s start %s" % (tstart, desc),
color=colors.YELLOW, mark="<")
try:
retval = fn(*args, **kwargs)
print _FormatHeader("PASSED %s" % (desc, ), color=colors.GREEN)
return retval
except Exception, e:
print _FormatHeader("FAILED %s: %s" % (desc, e), color=colors.RED)
raise
finally:
tstop = datetime.datetime.now()
tdelta = tstop - tstart
print _FormatHeader("%s time=%s %s" % (tstop, tdelta, desc),
color=colors.MAGENTA, mark=">")
def ReportTestSkip(desc, testnames):
"""Reports that tests have been skipped.
@type desc: string
@param desc: string
@type testnames: string or list of string
@param testnames: either a single test name in the configuration
file, or a list of testnames (which will be AND-ed together)
"""
tstart = datetime.datetime.now()
# TODO: Formatting test names when non-string names are involved
print _FormatHeader("%s skipping %s, test(s) %s disabled" %
(tstart, desc, testnames),
color=colors.BLUE, mark="*")
def RunTestIf(testnames, fn, *args, **kwargs):
"""Runs a test conditionally.
@param testnames: either a single test name in the configuration
file, or a list of testnames (which will be AND-ed together)
"""
if qa_config.TestEnabled(testnames):
RunTest(fn, *args, **kwargs)
else:
desc = _DescriptionOf(fn)
ReportTestSkip(desc, testnames)
def RunTestBlock(fn, *args, **kwargs):
"""Runs a block of tests after printing a header.
"""
tstart = datetime.datetime.now()
desc = _DescriptionOf(fn)
print
print _FormatHeader("BLOCK %s start %s" % (tstart, desc),
color=[colors.YELLOW, colors.BOLD], mark="v")
try:
return fn(*args, **kwargs)
except Exception, e:
print _FormatHeader("BLOCK FAILED %s: %s" % (desc, e),
color=[colors.RED, colors.BOLD])
raise
finally:
tstop = datetime.datetime.now()
tdelta = tstop - tstart
print _FormatHeader("BLOCK %s time=%s %s" % (tstop, tdelta, desc),
color=[colors.MAGENTA, colors.BOLD], mark="^")
def RunEnvTests():
"""Run several environment tests.
"""
RunTestIf("env", qa_env.TestSshConnection)
RunTestIf("env", qa_env.TestIcmpPing)
RunTestIf("env", qa_env.TestGanetiCommands)
def SetupCluster():
"""Initializes the cluster.
"""
RunTestIf("create-cluster", qa_cluster.TestClusterInit)
if not qa_config.TestEnabled("create-cluster"):
# If the cluster is already in place, we assume that exclusive-storage is
# already set according to the configuration
qa_config.SetExclusiveStorage(qa_config.get("exclusive-storage", False))
qa_rapi.SetupRapi()
qa_group.ConfigureGroups()
# Test on empty cluster
RunTestIf("node-list", qa_node.TestNodeList)
RunTestIf("instance-list", qa_instance.TestInstanceList)
RunTestIf("job-list", qa_job.TestJobList)
RunTestIf("create-cluster", qa_node.TestNodeAddAll)
if not qa_config.TestEnabled("create-cluster"):
# consider the nodes are already there
qa_node.MarkNodeAddedAll()
RunTestIf("test-jobqueue", qa_cluster.TestJobqueue)
RunTestIf("test-jobqueue", qa_job.TestJobCancellation)
# enable the watcher (unconditionally)
RunTest(qa_daemon.TestResumeWatcher)
RunTestIf("node-list", qa_node.TestNodeList)
# Test listing fields
RunTestIf("node-list", qa_node.TestNodeListFields)
RunTestIf("instance-list", qa_instance.TestInstanceListFields)
RunTestIf("job-list", qa_job.TestJobListFields)
RunTestIf("instance-export", qa_instance.TestBackupListFields)
RunTestIf("node-info", qa_node.TestNodeInfo)
def RunClusterTests():
"""Runs tests related to gnt-cluster.
"""
for test, fn in [
("create-cluster", qa_cluster.TestClusterInitDisk),
("cluster-renew-crypto", qa_cluster.TestClusterRenewCrypto)
]:
RunTestIf(test, fn)
for test, fn in [
("cluster-verify", qa_cluster.TestClusterVerify),
("cluster-reserved-lvs", qa_cluster.TestClusterReservedLvs),
# TODO: add more cluster modify tests
("cluster-modify", qa_cluster.TestClusterModifyEmpty),
("cluster-modify", qa_cluster.TestClusterModifyIPolicy),
("cluster-modify", qa_cluster.TestClusterModifyISpecs),
("cluster-modify", qa_cluster.TestClusterModifyBe),
("cluster-modify", qa_cluster.TestClusterModifyDisk),
("cluster-modify", qa_cluster.TestClusterModifyDiskTemplates),
("cluster-modify", qa_cluster.TestClusterModifyFileStorageDir),
("cluster-modify", qa_cluster.TestClusterModifySharedFileStorageDir),
("cluster-modify", qa_cluster.TestClusterModifyInstallImage),
("cluster-modify", qa_cluster.TestClusterModifyUserShutdown),
("cluster-rename", qa_cluster.TestClusterRename),
("cluster-info", qa_cluster.TestClusterVersion),
("cluster-info", qa_cluster.TestClusterInfo),
("cluster-info", qa_cluster.TestClusterGetmaster),
("cluster-redist-conf", qa_cluster.TestClusterRedistConf),
(["cluster-copyfile", qa_config.NoVirtualCluster],
qa_cluster.TestClusterCopyfile),
("cluster-command", qa_cluster.TestClusterCommand),
("cluster-burnin", qa_cluster.TestClusterBurnin),
("cluster-master-failover", qa_cluster.TestClusterMasterFailover),
("cluster-master-failover",
qa_cluster.TestClusterMasterFailoverWithDrainedQueue),
(["cluster-oob", qa_config.NoVirtualCluster],
qa_cluster.TestClusterOob),
("cluster-instance-communication", qa_cluster.TestInstanceCommunication),
(qa_rapi.Enabled, qa_rapi.TestVersion),
(qa_rapi.Enabled, qa_rapi.TestEmptyCluster),
(qa_rapi.Enabled, qa_rapi.TestRapiQuery),
]:
RunTestIf(test, fn)
def RunRepairDiskSizes():
"""Run the repair disk-sizes test.
"""
RunTestIf("cluster-repair-disk-sizes", qa_cluster.TestClusterRepairDiskSizes)
def RunOsTests():
"""Runs all tests related to gnt-os.
"""
os_enabled = ["os", qa_config.NoVirtualCluster]
if qa_config.TestEnabled(qa_rapi.Enabled):
rapi_getos = qa_rapi.GetOperatingSystems
else:
rapi_getos = None
for fn in [
qa_os.TestOsList,
qa_os.TestOsDiagnose,
]:
RunTestIf(os_enabled, fn)
for fn in [
qa_os.TestOsValid,
qa_os.TestOsInvalid,
qa_os.TestOsPartiallyValid,
]:
RunTestIf(os_enabled, fn, rapi_getos)
for fn in [
qa_os.TestOsModifyValid,
qa_os.TestOsModifyInvalid,
qa_os.TestOsStatesNonExisting,
]:
RunTestIf(os_enabled, fn)
def RunCommonInstanceTests(instance, inst_nodes):
"""Runs a few tests that are common to all disk types.
"""
RunTestIf("instance-shutdown", qa_instance.TestInstanceShutdown, instance)
RunTestIf(["instance-shutdown", "instance-console", qa_rapi.Enabled],
qa_rapi.TestRapiStoppedInstanceConsole, instance)
RunTestIf(["instance-shutdown", "instance-modify"],
qa_instance.TestInstanceStoppedModify, instance)
RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance)
# Test shutdown/start via RAPI
RunTestIf(["instance-shutdown", qa_rapi.Enabled],
qa_rapi.TestRapiInstanceShutdown, instance)
RunTestIf(["instance-shutdown", qa_rapi.Enabled],
qa_rapi.TestRapiInstanceStartup, instance)
RunTestIf("instance-list", qa_instance.TestInstanceList)
RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance)
RunTestIf("instance-modify", qa_instance.TestInstanceModify, instance)
RunTestIf(["instance-modify", qa_rapi.Enabled],
qa_rapi.TestRapiInstanceModify, instance)
RunTestIf("instance-console", qa_instance.TestInstanceConsole, instance)
RunTestIf(["instance-console", qa_rapi.Enabled],
qa_rapi.TestRapiInstanceConsole, instance)
RunTestIf("instance-device-names", qa_instance.TestInstanceDeviceNames,
instance)
DOWN_TESTS = qa_config.Either([
"instance-reinstall",
"instance-rename",
"instance-grow-disk",
])
# shutdown instance for any 'down' tests
RunTestIf(DOWN_TESTS, qa_instance.TestInstanceShutdown, instance)
# now run the 'down' state tests
RunTestIf("instance-reinstall", qa_instance.TestInstanceReinstall, instance)
RunTestIf(["instance-reinstall", qa_rapi.Enabled],
qa_rapi.TestRapiInstanceReinstall, instance)
if qa_config.TestEnabled("instance-rename"):
tgt_instance = qa_config.AcquireInstance()
try:
rename_source = instance.name
rename_target = tgt_instance.name
# perform instance rename to the same name
RunTest(qa_instance.TestInstanceRenameAndBack,
rename_source, rename_source)
RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
rename_source, rename_source)
if rename_target is not None:
# perform instance rename to a different name, if we have one configured
RunTest(qa_instance.TestInstanceRenameAndBack,
rename_source, rename_target)
RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
rename_source, rename_target)
finally:
tgt_instance.Release()
RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance)
# and now start the instance again
RunTestIf(DOWN_TESTS, qa_instance.TestInstanceStartup, instance)
RunTestIf("instance-reboot", qa_instance.TestInstanceReboot, instance)
RunTestIf("tags", qa_tags.TestInstanceTags, instance)
if instance.disk_template == constants.DT_DRBD8:
RunTestIf("cluster-verify",
qa_cluster.TestClusterVerifyDisksBrokenDRBD, instance, inst_nodes)
RunTestIf("cluster-verify", qa_cluster.TestClusterVerify)
RunTestIf(qa_rapi.Enabled, qa_rapi.TestInstance, instance)
# Lists instances, too
RunTestIf("node-list", qa_node.TestNodeList)
# Some jobs have been run, let's test listing them
RunTestIf("job-list", qa_job.TestJobList)
def RunCommonNodeTests():
"""Run a few common node tests.
"""
RunTestIf("node-volumes", qa_node.TestNodeVolumes)
RunTestIf("node-storage", qa_node.TestNodeStorage)
RunTestIf(["node-oob", qa_config.NoVirtualCluster], qa_node.TestOutOfBand)
def RunGroupListTests():
"""Run tests for listing node groups.
"""
RunTestIf("group-list", qa_group.TestGroupList)
RunTestIf("group-list", qa_group.TestGroupListFields)
def RunNetworkTests():
"""Run tests for network management.
"""
RunTestIf("network", qa_network.TestNetworkAddRemove)
RunTestIf("network", qa_network.TestNetworkConnect)
RunTestIf(["network", "tags"], qa_network.TestNetworkTags)
def RunFilterTests():
"""Run tests for job filter management.
"""
RunTestIf("filters", qa_filters.TestFilterList)
RunTestIf("filters", qa_filters.TestFilterListFields)
RunTestIf("filters", qa_filters.TestFilterAddRemove)
RunTestIf("filters", qa_filters.TestFilterReject)
RunTestIf("filters", qa_filters.TestFilterOpCode)
RunTestIf("filters", qa_filters.TestFilterReasonChain)
RunTestIf("filters", qa_filters.TestFilterContinue)
RunTestIf("filters", qa_filters.TestFilterAcceptPause)
RunTestIf("filters", qa_filters.TestFilterWatermark)
RunTestIf("filters", qa_filters.TestFilterRateLimit)
RunTestIf("filters", qa_filters.TestAdHocReasonRateLimit)
def RunGroupRwTests():
"""Run tests for adding/removing/renaming groups.
"""
RunTestIf("group-rwops", qa_group.TestGroupAddRemoveRename)
RunTestIf("group-rwops", qa_group.TestGroupAddWithOptions)
RunTestIf("group-rwops", qa_group.TestGroupModify)
RunTestIf(["group-rwops", qa_rapi.Enabled], qa_rapi.TestRapiNodeGroups)
RunTestIf(["group-rwops", "tags"], qa_tags.TestGroupTags,
qa_group.GetDefaultGroup())
def RunExportImportTests(instance, inodes):
"""Tries to export and import the instance.
@type inodes: list of nodes
@param inodes: current nodes of the instance
"""
# FIXME: export explicitly bails out on file based storage. other non-lvm
# based storage types are untested, though. Also note that import could still
# work, but is deeply embedded into the "export" case.
if qa_config.TestEnabled("instance-export"):
RunTest(qa_instance.TestInstanceExportNoTarget, instance)
pnode = inodes[0]
expnode = qa_config.AcquireNode(exclude=pnode)
try:
name = RunTest(qa_instance.TestInstanceExport, instance, expnode)
RunTest(qa_instance.TestBackupList, expnode)
if qa_config.TestEnabled("instance-import"):
newinst = qa_config.AcquireInstance()
try:
RunTest(qa_instance.TestInstanceImport, newinst, pnode,
expnode, name)
# Check if starting the instance works
RunTest(qa_instance.TestInstanceStartup, newinst)
RunTest(qa_instance.TestInstanceRemove, newinst)
finally:
newinst.Release()
finally:
expnode.Release()
# FIXME: inter-cluster-instance-move crashes on file based instances :/
# See Issue 414.
if (qa_config.TestEnabled([qa_rapi.Enabled, "inter-cluster-instance-move"])):
newinst = qa_config.AcquireInstance()
try:
tnode = qa_config.AcquireNode(exclude=inodes)
try:
RunTest(qa_rapi.TestInterClusterInstanceMove, instance, newinst,
inodes, tnode)
finally:
tnode.Release()
finally:
newinst.Release()
def RunDaemonTests(instance):
"""Test the ganeti-watcher script.
"""
RunTest(qa_daemon.TestPauseWatcher)
RunTestIf("instance-automatic-restart",
qa_daemon.TestInstanceAutomaticRestart, instance)
RunTestIf("instance-consecutive-failures",
qa_daemon.TestInstanceConsecutiveFailures, instance)
RunTest(qa_daemon.TestResumeWatcher)
def RunHardwareFailureTests(instance, inodes):
"""Test cluster internal hardware failure recovery.
"""
RunTestIf("instance-failover", qa_instance.TestInstanceFailover, instance)
RunTestIf(["instance-failover", qa_rapi.Enabled],
qa_rapi.TestRapiInstanceFailover, instance)
RunTestIf("instance-migrate", qa_instance.TestInstanceMigrate, instance)
RunTestIf(["instance-migrate", qa_rapi.Enabled],
qa_rapi.TestRapiInstanceMigrate, instance)
if qa_config.TestEnabled("instance-replace-disks"):
# We just need alternative secondary nodes, hence "- 1"
othernodes = qa_config.AcquireManyNodes(len(inodes) - 1, exclude=inodes)
try:
RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceReplaceDisks, instance)
RunTest(qa_instance.TestReplaceDisks,
instance, inodes, othernodes)
finally:
qa_config.ReleaseManyNodes(othernodes)
del othernodes
if qa_config.TestEnabled("instance-recreate-disks"):
try:
acquirednodes = qa_config.AcquireManyNodes(len(inodes), exclude=inodes)
othernodes = acquirednodes
except qa_error.OutOfNodesError:
if len(inodes) > 1:
# If the cluster is not big enough, let's reuse some of the nodes, but
# with different roles. In this way, we can test a DRBD instance even on
# a 3-node cluster.
acquirednodes = [qa_config.AcquireNode(exclude=inodes)]
othernodes = acquirednodes + inodes[:-1]
else:
raise
try:
RunTest(qa_instance.TestRecreateDisks,
instance, inodes, othernodes)
finally:
qa_config.ReleaseManyNodes(acquirednodes)
if len(inodes) >= 2:
RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, inodes[0], inodes[1])
RunTestIf("node-failover", qa_node.TestNodeFailover, inodes[0], inodes[1])
RunTestIf("node-migrate", qa_node.TestNodeMigrate, inodes[0], inodes[1])
def RunExclusiveStorageTests():
"""Test exclusive storage."""
if not qa_config.TestEnabled("cluster-exclusive-storage"):
return
node = qa_config.AcquireNode()
try:
old_es = qa_cluster.TestSetExclStorCluster(False)
qa_node.TestExclStorSingleNode(node)
qa_cluster.TestSetExclStorCluster(True)
qa_cluster.TestExclStorSharedPv(node)
if qa_config.TestEnabled("instance-add-plain-disk"):
# Make sure that the cluster doesn't have any pre-existing problem
qa_cluster.AssertClusterVerify()
# Create and allocate instances
instance1 = qa_instance.TestInstanceAddWithPlainDisk([node])
try:
instance2 = qa_instance.TestInstanceAddWithPlainDisk([node])
try:
# cluster-verify checks that disks are allocated correctly
qa_cluster.AssertClusterVerify()
# Remove instances
qa_instance.TestInstanceRemove(instance2)
qa_instance.TestInstanceRemove(instance1)
finally:
instance2.Release()
finally:
instance1.Release()
if qa_config.TestEnabled("instance-add-drbd-disk"):
snode = qa_config.AcquireNode()
try:
qa_cluster.TestSetExclStorCluster(False)
instance = qa_instance.TestInstanceAddWithDrbdDisk([node, snode])
try:
qa_cluster.TestSetExclStorCluster(True)
exp_err = [constants.CV_EINSTANCEUNSUITABLENODE]
qa_cluster.AssertClusterVerify(fail=True, errors=exp_err)
qa_instance.TestInstanceRemove(instance)
finally:
instance.Release()
finally:
snode.Release()
qa_cluster.TestSetExclStorCluster(old_es)
finally:
node.Release()
def RunCustomSshPortTests():
"""Test accessing nodes with custom SSH ports.
This requires removing nodes, adding them to a new group, and then undoing
the change.
"""
if not qa_config.TestEnabled("group-custom-ssh-port"):
return
std_port = netutils.GetDaemonPort(constants.SSH)
port = 211
master = qa_config.GetMasterNode()
with qa_config.AcquireManyNodesCtx(1, exclude=master) as nodes:
# Checks if the node(s) could be contacted through IPv6.
# If yes, better skip the whole test.
for node in nodes:
if qa_utils.UsesIPv6Connection(node.primary, std_port):
print ("Node %s is likely to be reached using IPv6,"
"skipping the test" % (node.primary, ))
return
for node in nodes:
qa_node.NodeRemove(node)
with qa_iptables.RulesContext() as r:
with qa_group.NewGroupCtx() as group:
qa_group.ModifyGroupSshPort(r, group, nodes, port)
for node in nodes:
qa_node.NodeAdd(node, group=group)
# Make sure that the cluster doesn't have any pre-existing problem
qa_cluster.AssertClusterVerify()
# Create and allocate instances
instance1 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
try:
instance2 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
try:
# cluster-verify checks that disks are allocated correctly
qa_cluster.AssertClusterVerify()
# Remove instances
qa_instance.TestInstanceRemove(instance2)
qa_instance.TestInstanceRemove(instance1)
finally:
instance2.Release()
finally:
instance1.Release()
for node in nodes:
qa_node.NodeRemove(node)
for node in nodes:
qa_node.NodeAdd(node)
qa_cluster.AssertClusterVerify()
def _BuildSpecDict(par, mn, st, mx):
return {
constants.ISPECS_MINMAX: [{
constants.ISPECS_MIN: {par: mn},
constants.ISPECS_MAX: {par: mx},
}],
constants.ISPECS_STD: {par: st},
}
def _BuildDoubleSpecDict(index, par, mn, st, mx):
new_spec = {
constants.ISPECS_MINMAX: [{}, {}],
}
if st is not None:
new_spec[constants.ISPECS_STD] = {par: st}
new_spec[constants.ISPECS_MINMAX][index] = {
constants.ISPECS_MIN: {par: mn},
constants.ISPECS_MAX: {par: mx},
}
return new_spec
def TestIPolicyPlainInstance():
"""Test instance policy interaction with instances"""
params = ["memory-size", "cpu-count", "disk-count", "disk-size", "nic-count"]
if not qa_config.IsTemplateSupported(constants.DT_PLAIN):
print "Template %s not supported" % constants.DT_PLAIN
return
# This test assumes that the group policy is empty
(_, old_specs) = qa_cluster.TestClusterSetISpecs()
# We also assume to have only one min/max bound
assert len(old_specs[constants.ISPECS_MINMAX]) == 1
node = qa_config.AcquireNode()
try:
# Log of policy changes, list of tuples:
# (full_change, incremental_change, policy_violated)
history = []
instance = qa_instance.TestInstanceAddWithPlainDisk([node])
try:
policyerror = [constants.CV_EINSTANCEPOLICY]
for par in params:
(iminval, imaxval) = qa_instance.GetInstanceSpec(instance.name, par)
# Some specs must be multiple of 4
new_spec = _BuildSpecDict(par, imaxval + 4, imaxval + 4, imaxval + 4)
history.append((None, new_spec, True))
if iminval > 0:
# Some specs must be multiple of 4
if iminval >= 4:
upper = iminval - 4
else:
upper = iminval - 1
new_spec = _BuildSpecDict(par, 0, upper, upper)
history.append((None, new_spec, True))
history.append((old_specs, None, False))
# Test with two instance specs
double_specs = copy.deepcopy(old_specs)
double_specs[constants.ISPECS_MINMAX] = \
double_specs[constants.ISPECS_MINMAX] * 2
(par1, par2) = params[0:2]
(_, imaxval1) = qa_instance.GetInstanceSpec(instance.name, par1)
(_, imaxval2) = qa_instance.GetInstanceSpec(instance.name, par2)
old_minmax = old_specs[constants.ISPECS_MINMAX][0]
history.extend([
(double_specs, None, False),
# The first min/max limit is being violated
(None,
_BuildDoubleSpecDict(0, par1, imaxval1 + 4, imaxval1 + 4,
imaxval1 + 4),
False),
# Both min/max limits are being violated
(None,
_BuildDoubleSpecDict(1, par2, imaxval2 + 4, None, imaxval2 + 4),
True),
# The second min/max limit is being violated
(None,
_BuildDoubleSpecDict(0, par1,
old_minmax[constants.ISPECS_MIN][par1],
old_specs[constants.ISPECS_STD][par1],
old_minmax[constants.ISPECS_MAX][par1]),
False),
(old_specs, None, False),
])
# Apply the changes, and check policy violations after each change
qa_cluster.AssertClusterVerify()
for (new_specs, diff_specs, failed) in history:
qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
diff_specs=diff_specs)
if failed:
qa_cluster.AssertClusterVerify(warnings=policyerror)
else:
qa_cluster.AssertClusterVerify()
qa_instance.TestInstanceRemove(instance)
finally:
instance.Release()
# Now we replay the same policy changes, and we expect that the instance
# cannot be created for the cases where we had a policy violation above
for (new_specs, diff_specs, failed) in history:
qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
diff_specs=diff_specs)
if failed:
qa_instance.TestInstanceAddWithPlainDisk([node], fail=True)
# Instance creation with no policy violation has been tested already
finally:
node.Release()
def IsExclusiveStorageInstanceTestEnabled():
test_name = "exclusive-storage-instance-tests"
if qa_config.TestEnabled(test_name):
vgname = qa_config.get("vg-name", constants.DEFAULT_VG)
vgscmd = utils.ShellQuoteArgs([
"vgs", "--noheadings", "-o", "pv_count", vgname,
])
nodes = qa_config.GetConfig()["nodes"]
for node in nodes:
try:
pvnum = int(qa_utils.GetCommandOutput(node.primary, vgscmd))
except Exception, e:
msg = ("Cannot get the number of PVs on %s, needed by '%s': %s" %
(node.primary, test_name, e))
raise qa_error.Error(msg)
if pvnum < 2:
raise qa_error.Error("Node %s has not enough PVs (%s) to run '%s'" %
(node.primary, pvnum, test_name))
res = True
else:
res = False
return res
def RunInstanceTests():
"""Create and exercise instances."""
requested_conversions = qa_config.get("convert-disk-templates", [])
supported_conversions = \
set(requested_conversions).difference(constants.DTS_NOT_CONVERTIBLE_TO)
for (test_name, templ, create_fun, num_nodes) in \
qa_instance.available_instance_tests:
if (qa_config.TestEnabled(test_name) and
qa_config.IsTemplateSupported(templ)):
inodes = qa_config.AcquireManyNodes(num_nodes)
try:
instance = RunTest(create_fun, inodes)
try:
RunTestIf("instance-user-down", qa_instance.TestInstanceUserDown,
instance)
RunTestIf("instance-communication",
qa_instance.TestInstanceCommunication,
instance,
qa_config.GetMasterNode())
RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
RunDaemonTests(instance)
for node in inodes:
RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, node,
templ == constants.DT_DRBD8)
if len(inodes) > 1:
RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
constants.INITIAL_NODE_GROUP_NAME,
inodes[0].primary, inodes[1].primary)
# This test will run once but it will cover all the supported
# user-provided disk template conversions
if qa_config.TestEnabled("instance-convert-disk"):
if (len(supported_conversions) > 1 and
instance.disk_template in supported_conversions):
RunTest(qa_instance.TestInstanceShutdown, instance)
RunTest(qa_instance.TestInstanceConvertDiskTemplate, instance,
supported_conversions)
RunTest(qa_instance.TestInstanceStartup, instance)
# At this point we clear the set because the requested conversions
# has been tested
supported_conversions.clear()
else:
test_desc = "Converting instance of template %s" % templ
ReportTestSkip(test_desc, "conversion feature")
RunTestIf("instance-modify-disks",
qa_instance.TestInstanceModifyDisks, instance)
RunCommonInstanceTests(instance, inodes)
if qa_config.TestEnabled("instance-modify-primary"):
othernode = qa_config.AcquireNode()
RunTest(qa_instance.TestInstanceModifyPrimaryAndBack,
instance, inodes[0], othernode)
othernode.Release()
RunGroupListTests()
RunExportImportTests(instance, inodes)
RunHardwareFailureTests(instance, inodes)
RunRepairDiskSizes()
RunTestIf(["rapi", "instance-data-censorship"],
qa_rapi.TestInstanceDataCensorship, instance, inodes)
RunTest(qa_instance.TestInstanceRemove, instance)
finally:
instance.Release()
del instance
finally:
qa_config.ReleaseManyNodes(inodes)
qa_cluster.AssertClusterVerify()
else:
test_desc = "Creating instances of template %s" % templ
if not qa_config.TestEnabled(test_name):
ReportTestSkip(test_desc, test_name)
else:
ReportTestSkip(test_desc, "disk template %s" % templ)
def RunMonitoringTests():
RunTestIf("mon-collector", qa_monitoring.TestInstStatusCollector)
PARALLEL_TEST_DICT = {
"parallel-failover": qa_performance.TestParallelInstanceFailover,
"parallel-migration": qa_performance.TestParallelInstanceMigration,
"parallel-replace-disks": qa_performance.TestParallelInstanceReplaceDisks,
"parallel-reboot": qa_performance.TestParallelInstanceReboot,
"parallel-reinstall": qa_performance.TestParallelInstanceReinstall,
"parallel-rename": qa_performance.TestParallelInstanceRename,
}
def RunPerformanceTests():
if not qa_config.TestEnabled("performance"):
ReportTestSkip("performance related tests", "performance")
return
# For reproducable performance, run performance tests with the watcher
# paused.
qa_utils.AssertCommand(["gnt-cluster", "watcher", "pause", "4h"])
if qa_config.TestEnabled("jobqueue-performance"):
RunTest(qa_performance.TestParallelMaxInstanceCreationPerformance)
RunTest(qa_performance.TestParallelNodeCountInstanceCreationPerformance)
instances = qa_performance.CreateAllInstances()
RunTest(qa_performance.TestParallelModify, instances)
RunTest(qa_performance.TestParallelInstanceOSOperations, instances)
RunTest(qa_performance.TestParallelInstanceQueries, instances)
qa_performance.RemoveAllInstances(instances)
RunTest(qa_performance.TestJobQueueSubmissionPerformance)
if qa_config.TestEnabled("parallel-performance"):
if qa_config.IsTemplateSupported(constants.DT_DRBD8):
RunTest(qa_performance.TestParallelDRBDInstanceCreationPerformance)
if qa_config.IsTemplateSupported(constants.DT_PLAIN):
RunTest(qa_performance.TestParallelPlainInstanceCreationPerformance)
# Preparations need to be made only if some of these tests are enabled
if qa_config.IsTemplateSupported(constants.DT_DRBD8) and \
qa_config.TestEnabled(qa_config.Either(PARALLEL_TEST_DICT.keys())):
inodes = qa_config.AcquireManyNodes(2)
try:
instance = qa_instance.TestInstanceAddWithDrbdDisk(inodes)
try:
for (test_name, test_fn) in PARALLEL_TEST_DICT.items():
RunTestIf(test_name, test_fn, instance)
finally:
instance.Release()
qa_instance.TestInstanceRemove(instance)
finally:
qa_config.ReleaseManyNodes(inodes)
qa_utils.AssertCommand(["gnt-cluster", "watcher", "continue"])
def RunQa():
"""Main QA body.
"""
RunTestBlock(RunEnvTests)
SetupCluster()
RunTestBlock(RunClusterTests)
RunTestBlock(RunOsTests)
RunTestIf("tags", qa_tags.TestClusterTags)
RunTestBlock(RunCommonNodeTests)
RunTestBlock(RunGroupListTests)
RunTestBlock(RunGroupRwTests)
RunTestBlock(RunNetworkTests)
RunTestBlock(RunFilterTests)
# The master shouldn't be readded or put offline; "delay" needs a non-master
# node to test
pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
try:
RunTestIf("node-readd", qa_node.TestNodeReadd, pnode)
RunTestIf("node-modify", qa_node.TestNodeModify, pnode)
RunTestIf("delay", qa_cluster.TestDelay, pnode)
finally:
pnode.Release()
# Make sure the cluster is clean before running instance tests
qa_cluster.AssertClusterVerify()
pnode = qa_config.AcquireNode()
try:
RunTestIf("tags", qa_tags.TestNodeTags, pnode)
if qa_rapi.Enabled():
RunTest(qa_rapi.TestNode, pnode)
if (qa_config.TestEnabled("instance-add-plain-disk")
and qa_config.IsTemplateSupported(constants.DT_PLAIN)):
# Normal instance allocation via RAPI
for use_client in [True, False]:
rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
use_client)
try:
if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
RunCommonInstanceTests(rapi_instance, [pnode])
RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
finally:
rapi_instance.Release()
del rapi_instance
# Multi-instance allocation
rapi_instance_one, rapi_instance_two = \
RunTest(qa_rapi.TestRapiInstanceMultiAlloc, pnode)
try:
RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance_one, True)
RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance_two, True)
finally:
rapi_instance_one.Release()
rapi_instance_two.Release()
finally:
pnode.Release()
config_list = [
("default-instance-tests", lambda: None, lambda _: None),
(IsExclusiveStorageInstanceTestEnabled,
lambda: qa_cluster.TestSetExclStorCluster(True),
qa_cluster.TestSetExclStorCluster),
]
for (conf_name, setup_conf_f, restore_conf_f) in config_list:
if qa_config.TestEnabled(conf_name):
oldconf = setup_conf_f()
RunTestBlock(RunInstanceTests)
restore_conf_f(oldconf)
pnode = qa_config.AcquireNode()
try:
if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
for shutdown in [False, True]:
instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, [pnode])
try:
expnode = qa_config.AcquireNode(exclude=pnode)
try:
if shutdown:
# Stop instance before exporting and removing it
RunTest(qa_instance.TestInstanceShutdown, instance)
RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
RunTest(qa_instance.TestBackupList, expnode)
finally:
expnode.Release()
finally:
instance.Release()
del expnode
del instance
qa_cluster.AssertClusterVerify()
finally:
pnode.Release()
if qa_rapi.Enabled():
RunTestIf("filters", qa_rapi.TestFilters)
RunTestIf("cluster-upgrade", qa_cluster.TestUpgrade)
RunTestBlock(RunExclusiveStorageTests)
RunTestIf(["cluster-instance-policy", "instance-add-plain-disk"],
TestIPolicyPlainInstance)
RunTestBlock(RunCustomSshPortTests)
RunTestIf(
"instance-add-restricted-by-disktemplates",
qa_instance.TestInstanceCreationRestrictedByDiskTemplates)
RunTestIf("instance-add-osparams", qa_instance.TestInstanceAddOsParams)
RunTestIf("instance-add-osparams", qa_instance.TestSecretOsParams)
# Test removing instance with offline drbd secondary
if qa_config.TestEnabled(["instance-remove-drbd-offline",
"instance-add-drbd-disk"]):
# Make sure the master is not put offline
snode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
try:
pnode = qa_config.AcquireNode(exclude=snode)
try:
instance = qa_instance.TestInstanceAddWithDrbdDisk([pnode, snode])
set_offline = lambda node: qa_node.MakeNodeOffline(node, "yes")
set_online = lambda node: qa_node.MakeNodeOffline(node, "no")
RunTest(qa_instance.TestRemoveInstanceOfflineNode, instance, snode,
set_offline, set_online)
finally:
pnode.Release()
finally:
snode.Release()
qa_cluster.AssertClusterVerify()
RunTestBlock(RunMonitoringTests)
RunPerformanceTests()
RunTestIf("cluster-destroy", qa_node.TestNodeRemoveAll)
RunTestIf("cluster-destroy", qa_cluster.TestClusterDestroy)
@UsesRapiClient
def main():
"""Main program.
"""
colors.check_for_colors()
parser = optparse.OptionParser(usage="%prog [options] <config-file>")
parser.add_option("--yes-do-it", dest="yes_do_it",
action="store_true",
help="Really execute the tests")
(opts, args) = parser.parse_args()
if len(args) == 1:
(config_file, ) = args
else:
parser.error("Wrong number of arguments.")
if not opts.yes_do_it:
print ("Executing this script irreversibly destroys any Ganeti\n"
"configuration on all nodes involved. If you really want\n"
"to start testing, supply the --yes-do-it option.")
sys.exit(1)
qa_config.Load(config_file)
primary = qa_config.GetMasterNode().primary
qa_utils.StartMultiplexer(primary)
print ("SSH command for primary node: %s" %
utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
print ("SSH command for other nodes: %s" %
utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", "")))
try:
RunQa()
finally:
qa_utils.CloseMultiplexers()
if __name__ == "__main__":
main()