Merge branch 'stable-2.17'
* stable-2.17
Introduce a non-strict flag for group verify disk
* stable-2.16
Improve error reporting in _VerifyClientCertificates
Simplify some inscrutable map/map/ifilter/zip code
Avoid overuse of operator in watcher *.py
Sprinkle some more list comprehensions
Replace map/partial with list comprehension
Replace uses of map/lambda with more Pythonic code
Replace map(operator.attrgetter, ...) uses
Fix typos in gnt-cluster man page
Hide errors for expected inotify failures in unittest
Add gnt-instance rename --force option
Improve documentation for gnt-instance failover
Allow master failover to ignore offline nodes
Fix LogicalVolume code to work with older /sbin/lvs
Shorten verifyMasterVote failure message
Adding a confirmation before gnt-node --offline no
Removed unnecessary dependency from rpc in cli
Refactor cli exception to its appropriate module
Clean-up of code and fix of pylint warnings
Use fork instead of spawnv in the watcher
Make 'make pep8' happy
Manually fix conflicts in lib/errors.py
Signed-off-by: Brian Foley <bpfoley@google.com>
Reviewed-by: Viktor Bachraty <vbachraty@google.com>
diff --git a/.gitignore b/.gitignore
index 0d19ea9..c84c0d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@
*.py[co]
*.swp
*~
+\#*\#
*.o
*.hpc_o
*.prof_o
@@ -23,6 +24,7 @@
.hpc/
# /
+/.buildbot
/.hsenv
/Makefile
/Makefile.ghc
diff --git a/Makefile.am b/Makefile.am
index cb81b6a..a52a332 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -99,6 +99,7 @@
masterddir = $(pkgpythondir)/masterd
confddir = $(pkgpythondir)/confd
rapidir = $(pkgpythondir)/rapi
+rapi_authdir = $(pkgpythondir)/rapi/auth
rpcdir = $(pkgpythondir)/rpc
rpc_stubdir = $(pkgpythondir)/rpc/stub
serverdir = $(pkgpythondir)/server
@@ -219,6 +220,7 @@
lib/impexpd \
lib/masterd \
lib/rapi \
+ lib/rapi/auth \
lib/rpc \
lib/rpc/stub \
lib/server \
@@ -546,7 +548,8 @@
jqueue_PYTHON = \
lib/jqueue/__init__.py \
- lib/jqueue/exec.py
+ lib/jqueue/exec.py \
+ lib/jqueue/post_hooks_exec.py
storage_PYTHON = \
lib/storage/__init__.py \
@@ -569,6 +572,12 @@
lib/rapi/rlib2.py \
lib/rapi/testutils.py
+rapi_auth_PYTHON = \
+ lib/rapi/auth/__init__.py \
+ lib/rapi/auth/basic_auth.py \
+ lib/rapi/auth/pam.py \
+ lib/rapi/auth/users_file.py
+
http_PYTHON = \
lib/http/__init__.py \
lib/http/auth.py \
@@ -683,6 +692,7 @@
doc/design-draft.rst \
doc/design-file-based-disks-ownership.rst \
doc/design-file-based-storage.rst \
+ doc/design-global-hooks.rst \
doc/design-glusterfs-ganeti-support.rst \
doc/design-hotplug.rst \
doc/design-hroller.rst \
@@ -722,6 +732,7 @@
doc/design-query-splitting.rst \
doc/design-query2.rst \
doc/design-query-splitting.rst \
+ doc/design-rapi-pam.rst \
doc/design-reason-trail.rst \
doc/design-repaird.rst \
doc/design-reservations.rst \
@@ -1355,12 +1366,14 @@
qa/qa_error.py \
qa/qa_filters.py \
qa/qa_group.py \
+ qa/qa_global_hooks.py \
qa/qa_instance.py \
qa/qa_instance_utils.py \
qa/qa_iptables.py \
qa/qa_job.py \
qa/qa_job_utils.py \
qa/qa_logging.py \
+ qa/qa_maintd.py \
qa/qa_monitoring.py \
qa/qa_network.py \
qa/qa_node.py \
@@ -1768,6 +1781,7 @@
test/data/htools/hail-reloc-drbd.json \
test/data/htools/hail-reloc-drbd-crowded.json \
test/data/htools/hbal-avoid-disk-moves.data \
+ test/data/htools/hbal-avoid-long-solutions.data \
test/data/htools/hbal-cpu-speed.data \
test/data/htools/hbal-desiredlocation-1.data \
test/data/htools/hbal-desiredlocation-2.data \
@@ -1877,6 +1891,7 @@
test/data/cluster_config_2.15.json \
test/data/cluster_config_2.16.json \
test/data/cluster_config_2.17.json \
+ test/data/cluster_config_2.18.json \
test/data/instance-minor-pairing.txt \
test/data/instance-disks.txt \
test/data/ip-addr-show-dummy0.txt \
@@ -2132,6 +2147,7 @@
$(jqueue_PYTHON) \
$(storage_PYTHON) \
$(rapi_PYTHON) \
+ $(rapi_auth_PYTHON) \
$(server_PYTHON) \
$(rpc_PYTHON) \
$(rpc_stub_PYTHON) \
@@ -3028,7 +3044,9 @@
commit-check: autotools-check distcheck lint apidoc
autotools-check:
- TESTDATA_DIR=./test/data shelltest $(SHELLTESTARGS) \
+ @test -n "$(SHELLTEST)" || \
+ { echo 'shelltest' not found during configure; exit 1; }
+ TESTDATA_DIR=./test/data $(SHELLTEST) $(SHELLTESTARGS) \
$(abs_top_srcdir)/test/autotools/*-*.test \
-- --hide-successes
diff --git a/NEWS b/NEWS
index 944fd0d..59a3914 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,12 @@
====
+Version 2.18.0 alpha1
+---------------------
+
+*(unreleased)*
+
+
Version 2.17.0 beta1
--------------------
diff --git a/cabal/ganeti.template.cabal b/cabal/ganeti.template.cabal
index a3932d0..bf8aeed 100644
--- a/cabal/ganeti.template.cabal
+++ b/cabal/ganeti.template.cabal
@@ -1,5 +1,5 @@
name: ganeti
-version: 2.16
+version: 2.18
homepage: http://www.ganeti.org
license: BSD2
license-file: COPYING
@@ -54,7 +54,7 @@
, transformers >= 0.3.0.0
, unix >= 2.5.1.0
- , attoparsec >= 0.10.1.1 && < 0.13
+ , attoparsec >= 0.10.1.1 && < 0.13.1
, base64-bytestring >= 1.0.0.1 && < 1.1
, case-insensitive >= 0.4.0.1 && < 1.3
, Crypto >= 4.2.4 && < 4.3
@@ -62,7 +62,7 @@
, hinotify >= 0.3.2 && < 0.4
, hslogger >= 1.1.4 && < 1.3
, json >= 0.5 && < 1.0
- , lens >= 3.10 && <= 4.13.1
+ , lens >= 3.10 && < 4.13.2
, lifted-base >= 0.2.0.3 && < 0.3
, monad-control >= 0.3.1.3 && < 1.1
, MonadCatchIO-transformers >= 0.3.0.0 && < 0.4
@@ -71,8 +71,8 @@
, regex-pcre >= 0.94.2 && < 0.95
, temporary >= 1.1.2.3 && < 1.3
, transformers-base >= 0.4.1 && < 0.5
- , utf8-string >= 0.3.7 && < 0.4
- , zlib >= 0.5.3.3 && < 0.6
+ , utf8-string >= 0.3.7 && < 1.0.2
+ , zlib >= 0.5.3.3 && < 0.6.2
-- Executables:
-- , happy
@@ -81,8 +81,8 @@
if flag(htest)
build-depends:
- HUnit >= 1.2.4.2 && < 1.3
- , QuickCheck >= 2.4.2 && < 2.8
+ HUnit >= 1.2.4.2 && < 1.3.2
+ , QuickCheck >= 2.4.2 && < 2.8.3
, test-framework >= 0.6 && < 0.9
, test-framework-hunit >= 0.2.7 && < 0.4
, test-framework-quickcheck2 >= 0.2.12.1 && < 0.4
diff --git a/configure.ac b/configure.ac
index e9be40a..c808614 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,8 +1,8 @@
# Configure script for Ganeti
m4_define([gnt_version_major], [2])
-m4_define([gnt_version_minor], [17])
+m4_define([gnt_version_minor], [18])
m4_define([gnt_version_revision], [0])
-m4_define([gnt_version_suffix], [~beta1])
+m4_define([gnt_version_suffix], [~alpha1])
m4_define([gnt_version_full],
m4_format([%d.%d.%d%s],
gnt_version_major, gnt_version_minor,
@@ -95,7 +95,7 @@
# --enable-haskell-tests
AC_ARG_ENABLE([haskell-tests],
[AS_HELP_STRING([--enable-haskell-tests],
- m4_normalize([enable additinal Haskell development test code
+ m4_normalize([enable additional Haskell development test code
(default: disabled)]))],
[[if test "$enableval" != yes; then
HTEST=no
@@ -761,6 +761,8 @@
AC_GHC_PKG_CHECK([test-framework-hunit], [], [HS_NODEV=1])
AC_GHC_PKG_CHECK([test-framework-quickcheck2], [], [HS_NODEV=1])
AC_GHC_PKG_CHECK([temporary], [], [HS_NODEV=1])
+AC_PATH_PROG(SHELLTEST, [shelltest], [])
+if test -z "$SHELLTEST"; then HS_NODEV=1; fi
if test -n "$HS_NODEV"; then
AC_MSG_WARN(m4_normalize([Required development modules were not found,
you won't be able to run Haskell unittests]))
@@ -798,7 +800,7 @@
AC_MSG_WARN([hlint not found, checking code will not be possible])
fi
-AM_CONDITIONAL([WANT_HSTESTS], [test "x$HS_NODEV" = x])
+AM_CONDITIONAL([WANT_HSTESTS], [test "x$HS_NODEV" = x && test x$HTEST = xyes ])
AM_CONDITIONAL([WANT_HSAPIDOC], [test "$HS_APIDOC" = yes])
AM_CONDITIONAL([HAS_HLINT], [test "$HLINT"])
diff --git a/devel/build_chroot b/devel/build_chroot
index d1160f6..d74b55f 100755
--- a/devel/build_chroot
+++ b/devel/build_chroot
@@ -22,6 +22,7 @@
# Additional Variables taken from the environmen
# DATA_DIR
# CHROOT_EXTRA_DEBIAN_PACKAGES
+# APT_CACHE
# make the appended variant name more readable
[ -n "$VARIANT" ] && VARIANT="-${VARIANT#-}"
@@ -41,14 +42,15 @@
ACTUAL_DATA_DIR=${ACTUAL_DATA_DIR:-$TEMP_DATA_DIR}
SHA1_LIST='
cabal-install-1.18.0.2.tar.gz 2d1f7a48d17b1e02a1e67584a889b2ff4176a773
+cabal-install-1.20.0.3.tar.gz 444448b0f704420e329e8fc1989b6743c1c8546d
cabal-install-1.22.4.0.tar.gz b98eea96d321cdeed83a201c192dac116e786ec2
cabal-install-1.22.6.0.tar.gz d474b0eef6944af1abef92419cea13cee50993f3
ghc-7.6.3-i386-unknown-linux.tar.bz2 f042b4171a2d4745137f2e425e6949c185f8ea14
ghc-7.6.3-x86_64-unknown-linux.tar.bz2 46ec3f3352ff57fba0dcbc8d9c20f7bcb6924b77
-ghc-7.8.4-i386-unknown-linux-deb7.tar.bz2 4f523f854c37a43b738359506a89a37a9fa9fc5f
-ghc-7.8.4-x86_64-unknown-linux-deb7.tar.bz2 3f68321b064e5c1ffcb05838b85bcc00aa2315b4
-ghc-7.10.2-i386-unknown-linux-deb7.tar.bz2 c759ab9af566f5c3c9b75b702615f1d0c2f999fd
-ghc-7.10.2-x86_64-unknown-linux-deb7.tar.bz2 f028e4a07995353a47286478fc8644f66defa227
+ghc-7.8.4-i386-unknown-linux-deb7.tar.xz a5d4f65b9b063eae476657cb9b93277609c42cf1
+ghc-7.8.4-x86_64-unknown-linux-deb7.tar.xz 11aec12d4bb27f6fa59dcc8535a7a3b3be8cb787
+ghc-7.10.3-i386-deb8-linux.tar.xz 8f4ca6505788deb7f68cde91f38acba68ab37d99
+ghc-7.10.3b-x86_64-deb8-linux.tar.xz c8f277a34bf62d5e8063ee17af427ca5ffe57b68
'
# export all variables needed in the schroot
@@ -142,7 +144,7 @@
cat $ACTUAL_DATA_DIR/final.schroot.conf.in | subst_variables > $FINAL_CHROOT_CONF
#Install the base system
-debootstrap --arch $ARCH $DIST_RELEASE $CHDIR
+debootstrap --arch $ARCH $DIST_RELEASE $CHDIR $APT_CACHE
APT_INSTALL="apt-get install -y --no-install-recommends"
@@ -190,17 +192,17 @@
local TDIR=$( schroot -c $CHNAME -d / -- mktemp -d )
[ -n "$TDIR" ]
if [ "$ARCH" == "amd64" ] ; then
- download "$TDIR"/ghc.tar.bz2 \
- http://www.haskell.org/ghc/dist/${GHC_VERSION}/ghc-${GHC_VERSION}-x86_64-unknown-linux${GHC_VARIANT}.tar.bz2
+ download "$TDIR"/ghc.tar.${GHC_COMP} \
+ http://www.haskell.org/ghc/dist/${GHC_VERSION}/ghc-${GHC_BUILD}-x86_64-${GHC_VARIANT}.tar.${GHC_COMP}
elif [ "$ARCH" == "i386" ] ; then
- download "$TDIR"/ghc.tar.bz2 \
- http://www.haskell.org/ghc/dist/${GHC_VERSION}/ghc-${GHC_VERSION}-i386-unknown-linux${GHC_VARIANT}.tar.bz2
+ download "$TDIR"/ghc.tar.${GHC_COMP} \
+ http://www.haskell.org/ghc/dist/${GHC_VERSION}/ghc-${GHC_BUILD}-i386-${GHC_VARIANT}.tar.${GHC_COMP}
else
echo "Don't know what GHC to download for architecture $ARCH" >&2
return 1
fi
schroot -c $CHNAME -d "$TDIR" -- \
- tar xjf ghc.tar.bz2
+ tar xf ghc.tar.${GHC_COMP}
schroot -c $CHNAME -d "$TDIR/ghc-${GHC_VERSION}" -- \
./configure --prefix=/usr/local
schroot -c $CHNAME -d "$TDIR/ghc-${GHC_VERSION}" -- \
@@ -228,10 +230,12 @@
squeeze)
GHC_VERSION="7.6.3"
- GHC_VARIANT=""
+ GHC_BUILD="7.6.3"
+ GHC_VARIANT="unknown-linux"
+ GHC_COMP="bz2"
CABAL_INSTALL_VERSION="1.18.0.2"
- CABAL_LIB_VERSION=">=1.18.0 && <1.19"
- export GHC_VERSION GHC_VARIANT CABAL_INSTALL_VERSION
+ CABAL_LIB_VERSION="==1.18.1"
+ export GHC_VERSION GHC_BUILD GHC_VARIANT GHC_COMP CABAL_INSTALL_VERSION
# do not install libghc6-network-dev, since it's too old, and just
# confuses the dependencies
@@ -279,10 +283,10 @@
in_chroot -- \
cabal update
- # sinec we're using Cabal >=1.16, we can use the parallel install option
+ # since we're using Cabal >=1.16, we can use the parallel install option
in_chroot -- \
cabal install --global -j --enable-library-profiling \
- attoparsec-0.11.1.0 \
+ attoparsec-0.11.3.4 \
base64-bytestring-1.0.0.1 \
blaze-builder-0.3.3.2 \
case-insensitive-1.1.0.3 \
@@ -295,7 +299,7 @@
hslogger-1.2.3 \
json-0.7 \
lifted-base-0.2.2.0 \
- lens-4.0.4 \
+ lens-4.2 \
MonadCatchIO-transformers-0.3.0.0 \
network-2.4.1.2 \
parallel-3.2.0.4 \
@@ -316,7 +320,6 @@
PSQueue-1.1 \
\
"Cabal $CABAL_LIB_VERSION" \
- cabal-file-th-0.2.3 \
shelltestrunner
#Install selected packages from backports
@@ -375,9 +378,11 @@
in_chroot -- \
cabal install --global \
'base64-bytestring>=1' \
- lens-3.10.2 \
+ lens-4.1.2.1 \
+ unordered-containers-0.2.5.1 \
'lifted-base>=0.1.2' \
- 'hlint>=1.9.12'
+ 'hlint>=1.9.12' \
+ 'haskell-src-exts-1.16.0.1'
;;
jessie)
@@ -395,11 +400,22 @@
libghc-zlib-dev libghc-psqueue-dev \
libghc-base64-bytestring-dev libghc-lens-dev libghc-lifted-base-dev \
libghc-cabal-dev \
+ ghc-prof libghc-network-prof \
+ libghc-test-framework{,-hunit,-quickcheck2}-prof \
+ libghc-json-prof libghc-curl-prof libghc-hinotify-prof \
+ libghc-parallel-prof libghc-utf8-string-prof \
+ libghc-hslogger-prof libghc-crypto-prof \
+ libghc-regex-pcre-prof libghc-attoparsec-prof \
+ libghc-vector-prof libghc-temporary-prof \
+ libghc-snap-server-prof libghc-zlib-prof libghc-psqueue-prof \
+ libghc-base64-bytestring-prof libghc-lens-prof libghc-lifted-base-prof \
+ libghc-cabal-prof \
cabal-install \
python-setuptools python-sphinx python-epydoc graphviz python-pyparsing \
python-simplejson python-pycurl python-pyinotify python-paramiko \
- python-bitarray python-ipaddr python-yaml qemu-utils python-coverage pep8 \
- shelltestrunner python-dev pylint openssh-client \
+ python-bitarray python-ipaddr python-yaml python-psutil \
+ qemu-utils python-coverage pep8 \
+ shelltestrunner python-dev pylint openssh-client ca-certificates \
vim git git-email exuberant-ctags
in_chroot -- \
@@ -412,13 +428,16 @@
jessie-ghc710)
- GHC_VERSION="7.10.2"
- GHC_VARIANT="-deb7"
+ GHC_VERSION="7.10.3"
+ GHC_BUILD="7.10.3"
+ [ "$ARCH" == "amd64" ] && GHC_BUILD="7.10.3b"
+ GHC_VARIANT="deb8-linux"
+ GHC_COMP="xz"
CABAL_INSTALL_VERSION="1.22.6.0"
# the version of the Cabal library below must match the version used by
# CABAL_INSTALL_VERSION, see the dependencies of cabal-install
CABAL_LIB_VERSION=">=1.22.2 && <1.23"
- export GHC_VERSION GHC_VARIANT CABAL_INSTALL_VERSION
+ export GHC_VERSION GHC_BUILD GHC_VARIANT GHC_COMP CABAL_INSTALL_VERSION
in_chroot -- \
$APT_INSTALL \
@@ -439,12 +458,13 @@
python-ipaddr \
python-yaml \
python-paramiko \
+ python-psutil \
git \
git-email \
vim
in_chroot -- \
- $APT_INSTALL python-setuptools python-dev build-essential
+ $APT_INSTALL python-setuptools python-dev build-essential ca-certificates
in_chroot -- \
easy_install \
@@ -468,7 +488,7 @@
cabal update
in_chroot -- \
- cabal install --global \
+ cabal install --global -j --enable-library-profiling \
HUnit-1.2.5.2 \
PSQueue-1.1 \
StateVar-1.1.0.0 \
@@ -543,12 +563,13 @@
jessie-ghc78)
GHC_VERSION="7.8.4"
- GHC_VARIANT="-deb7"
- CABAL_INSTALL_VERSION="1.22.4.0"
- # the version of the Cabal library below must match the version used by
- # CABAL_INSTALL_VERSION, see the dependencies of cabal-install
- CABAL_LIB_VERSION=">=1.22.2 && <1.23"
- export GHC_VERSION GHC_VARIANT CABAL_INSTALL_VERSION
+ GHC_BUILD="7.8.4"
+ GHC_VARIANT="unknown-linux-deb7"
+ GHC_COMP="xz"
+ # Version of cabal-install closest to the jessie install
+ CABAL_INSTALL_VERSION="1.20.0.3"
+ CABAL_LIB_VERSION="==1.20.0.0"
+ export GHC_VERSION GHC_BUILD GHC_VARIANT GHC_COMP CABAL_INSTALL_VERSION
in_chroot -- \
$APT_INSTALL \
@@ -568,12 +589,13 @@
python-ipaddr \
python-yaml \
python-paramiko \
+ python-psutil \
git \
git-email \
vim
in_chroot -- \
- $APT_INSTALL python-setuptools python-dev build-essential
+ $APT_INSTALL python-setuptools python-dev build-essential ca-certificates
in_chroot -- \
easy_install \
@@ -597,7 +619,9 @@
cabal update
# since we're using Cabal >=1.16, we can use the parallel install option
- in_chroot -- \
+ # Set LANG as a workaround for a bug in Cabal 1.20 which breaks
+ # tf-random-0.5 install. See https://github.com/haskell/cabal/issues/1883
+ in_chroot -- env LANG=C.UTF-8 \
cabal install --global -j --enable-library-profiling \
attoparsec==0.12.1.6 \
base64-bytestring==1.0.0.1 \
@@ -612,7 +636,7 @@
hslogger==1.2.8 \
json==0.9.1 \
lifted-base==0.2.3.6 \
- lens==4.9.1 \
+ lens==4.12.3 \
MonadCatchIO-transformers==0.3.1.3 \
network==2.6.0.2 \
parallel==3.2.0.6 \
@@ -621,10 +645,12 @@
temporary==1.2.0.3 \
vector==0.10.12.3 \
zlib==0.5.4.2 \
+ utf8-string-0.3.8 \
\
hlint==1.9.20 \
HUnit==1.2.5.2 \
- QuickCheck==2.8.1 \
+ PSQueue==1.1 \
+ QuickCheck==2.7.6 \
test-framework==0.8.1.1 \
test-framework-hunit==0.3.0.1 \
test-framework-quickcheck2==0.3.0.3 \
@@ -632,7 +658,6 @@
snap-server==0.9.5.1 \
\
"Cabal $CABAL_LIB_VERSION" \
- cabal-file-th==0.2.3 \
shelltestrunner==1.3.5
;;
@@ -656,6 +681,13 @@
libghc-attoparsec-dev \
libghc-vector-dev libghc-temporary-dev libghc-psqueue-dev \
libghc-cabal-dev \
+ ghc-prof \
+ libghc-curl-prof libghc-hinotify-prof \
+ libghc-parallel-prof libghc-utf8-string-prof \
+ libghc-crypto-prof \
+ libghc-attoparsec-prof \
+ libghc-vector-prof libghc-temporary-prof libghc-psqueue-prof \
+ libghc-cabal-prof \
cabal-install \
libpcre3 libpcre3-dev happy hscolour pandoc \
python-setuptools python-sphinx python-epydoc graphviz python-pyparsing \
@@ -678,25 +710,163 @@
# Precise has network-2.4.0.0, which breaks, see
# https://github.com/haskell/network/issues/60
in_chroot -- \
- cabal install --global \
+ cabal install --global --enable-library-profiling \
'base64-bytestring>=1' \
- hslogger-1.2.3 \
- 'hlint>=1.9.12' \
- json-0.7 \
- lens-3.10.2 \
+ 'hslogger-1.2.3' \
+ 'hlint-1.9.22' \
+ 'json-0.7' \
+ 'lens-4.1.2.1' \
'lifted-base>=0.1.2' \
'network>=2.4.0.1' \
'regex-pcre>=0.94.4' \
- parsec-3.1.3 \
+ 'parsec-3.1.3' \
shelltestrunner \
'snap-server>=0.8.1' \
- test-framework-0.8.0.3 \
- test-framework-hunit-0.3.0.1 \
- test-framework-quickcheck2-0.3.0.2 \
+ 'test-framework-0.8.0.3' \
+ 'test-framework-hunit-0.3.0.1' \
+ 'test-framework-quickcheck2-0.3.0.2' \
'transformers>=0.3.0.0' \
zlib-0.5.4.2
;;
+ trusty)
+ # ghc, git-email and other dependencies are hosted in the universe
+ # repository, which is not enabled by default.
+ echo "Adding universe repository..."
+ cat > $CHDIR/etc/apt/sources.list.d/universe.list <<EOF
+deb http://archive.ubuntu.com/ubuntu trusty universe
+EOF
+ in_chroot -- \
+ apt-get update
+
+ echo "Installing packages"
+ in_chroot -- \
+ $APT_INSTALL \
+ autoconf automake ghc ghc-haddock \
+ libghc-curl-dev libghc-hinotify-dev \
+ libghc-parallel-dev libghc-utf8-string-dev \
+ libghc-crypto-dev \
+ libghc-attoparsec-dev \
+ libghc-vector-dev libghc-temporary-dev libghc-psqueue-dev \
+ libghc-cabal-dev \
+ ghc-prof \
+ libghc-curl-prof libghc-hinotify-prof \
+ libghc-parallel-prof libghc-utf8-string-prof \
+ libghc-crypto-prof \
+ libghc-attoparsec-prof \
+ libghc-vector-prof libghc-temporary-prof libghc-psqueue-prof \
+ libghc-cabal-prof \
+ cabal-install \
+ libpcre3 libpcre3-dev happy hscolour pandoc \
+ python-setuptools python-sphinx python-epydoc graphviz python-pyparsing \
+ python-simplejson python-pyinotify python-pycurl python-paramiko \
+ python-bitarray python-ipaddr python-yaml qemu-utils python-coverage pep8 \
+ python-dev pylint openssh-client vim git git-email exuberant-ctags \
+ build-essential
+
+ in_chroot -- \
+ easy_install \
+ logilab-astng==0.24.1 \
+ logilab-common==0.58.3 \
+ mock==1.0.1 \
+ pylint==0.26.0 \
+ pep8==1.3.3
+
+ in_chroot -- \
+ cabal update
+
+ # since we're using Cabal >=1.16, we can use the parallel install option
+ in_chroot -- \
+ cabal install --global -j --enable-library-profiling \
+ 'base64-bytestring>=1' \
+ 'hslogger>=1.2.3' \
+ 'hlint>=1.9.12' \
+ 'json>=0.7' \
+ lens-4.1.2.1 \
+ 'lifted-base>=0.1.2' \
+ 'network>=2.4.0.1' \
+ 'regex-pcre>=0.94.4' \
+ 'parsec>=3.1.3' \
+ shelltestrunner \
+ 'snap-server>=0.8.1' \
+ 'test-framework>=0.8.0.3' \
+ 'test-framework-hunit>=0.3.0.1' \
+ 'test-framework-quickcheck2>=0.3.0.2' \
+ 'transformers>=0.3.0.0' \
+ 'zlib==0.5.4.2'
+ ;;
+
+ xenial)
+ # ghc, git-email and other dependencies are hosted in the universe
+ # repository, which is not enabled by default.
+ echo "Adding universe repository..."
+ cat > $CHDIR/etc/apt/sources.list.d/universe.list <<EOF
+deb http://archive.ubuntu.com/ubuntu xenial universe
+EOF
+ in_chroot -- \
+ apt-get update
+
+ echo "Installing packages"
+ in_chroot -- \
+ $APT_INSTALL \
+ autoconf automake \
+ zlib1g-dev \
+ ghc ghc-haddock \
+ libghc-curl-dev libghc-hinotify-dev \
+ libghc-parallel-dev libghc-utf8-string-dev \
+ libghc-crypto-dev \
+ libghc-attoparsec-dev \
+ libghc-vector-dev libghc-temporary-dev libghc-psqueue-dev \
+ libghc-cabal-dev \
+ ghc-prof \
+ libghc-curl-prof libghc-hinotify-prof \
+ libghc-parallel-prof libghc-utf8-string-prof \
+ libghc-crypto-prof \
+ libghc-attoparsec-prof \
+ libghc-vector-prof libghc-temporary-prof libghc-psqueue-prof \
+ libghc-cabal-prof \
+ cabal-install \
+ libpcre3 libpcre3-dev happy hscolour pandoc \
+ python-setuptools python-sphinx python-epydoc graphviz python-pyparsing \
+ python-simplejson python-pyinotify python-pycurl python-paramiko \
+ python-bitarray python-ipaddr python-yaml qemu-utils python-coverage pep8 \
+ python-dev pylint openssh-client ca-certificates \
+ vim git git-email exuberant-ctags build-essential
+
+ in_chroot -- \
+ easy_install \
+ logilab-astng==0.24.1 \
+ logilab-common==0.58.3 \
+ mock==1.0.1 \
+ pylint==0.26.0 \
+ pep8==1.3.3
+
+ in_chroot -- \
+ cabal update
+
+ # since we're using Cabal >=1.16, we can use the parallel install option
+ in_chroot -- \
+ cabal install --global -j --enable-library-profiling \
+ 'base64-bytestring>=1' \
+ 'hslogger>=1.2.3' \
+ 'hlint>=1.9.12' \
+ 'json>=0.7' \
+ 'QuickCheck==2.7.6' \
+ lens-4.12.3 \
+ 'lifted-base>=0.1.2' \
+ 'network>=2.4.0.1' \
+ 'regex-pcre>=0.94.4' \
+ 'parsec>=3.1.3' \
+ shelltestrunner \
+ 'snap-server>=0.8.1' \
+ 'test-framework>=0.8.0.3' \
+ 'test-framework-hunit>=0.3.0.1' \
+ 'test-framework-quickcheck2>=0.3.0.2' \
+ 'transformers>=0.3.0.0' \
+ 'zlib==0.5.4.2'
+ ;;
+
+
*)
in_chroot -- \
$APT_INSTALL \
@@ -714,7 +884,8 @@
libghc-base64-bytestring-dev \
python-setuptools python-sphinx python-epydoc graphviz python-pyparsing \
python-simplejson python-pyinotify python-pycurl python-paramiko \
- python-bitarray python-ipaddr python-yaml qemu-utils python-coverage pep8 \
+ python-bitarray python-ipaddr python-yaml python-psutil \
+ qemu-utils python-coverage pep8 \
shelltestrunner python-dev pylint openssh-client \
vim git git-email exuberant-ctags \
build-essential
@@ -754,8 +925,15 @@
ndisc6 python-openssl openssl \
python-mock fping qemu-utils
-in_chroot -- \
- easy_install psutil
+# Ganeti requires psutil APIs only available in 2.x and removed in 3.0.
+# Only jessie and stretch have new enough packages, so install a pinned
+# version elsewhere.
+case ${DIST_RELEASE} in
+ squeeze|wheezy|precise|trusty)
+ in_chroot -- \
+ easy_install 'psutil>=2.0.0,<3.0.0'
+ ;;
+esac
in_chroot -- \
easy_install jsonpointer \
diff --git a/doc/design-ceph-ganeti-support.rst b/doc/design-ceph-ganeti-support.rst
index 7ec865c..086ac7c 100644
--- a/doc/design-ceph-ganeti-support.rst
+++ b/doc/design-ceph-ganeti-support.rst
@@ -68,11 +68,20 @@
Updated commands
----------------
-::
- $ gnt-instance info
+
+The following are the affected commands::
+
+ $ gnt-instance info
``access:userspace/kernelspace`` will be added to Disks category. This
-output applies to KVM based instances only.
+output applies to KVM based instances only::
+
+ $ gnt-cluster modify -D rbd:user-id=foobar
+
+The user id for ceph authentication is an optional setting. If it is not
+provided, then no special option is passed to ceph. If it is provided,
+then all ceph commands are run with the ``--user`` option and the
+configured username.
Ceph configuration on Ganeti nodes
==================================
@@ -120,17 +129,17 @@
Updated Commands
----------------
-Following are the affected commands.::
+Following are the affected commands::
$ gnt-cluster init -S ceph:disk=/dev/sdb,option=value...
During cluster initialization, ceph specific options are provided which
-apply at cluster-level.::
+apply at cluster-level::
$ gnt-cluster modify -S ceph:option=value2...
For now, cluster modification will be allowed when there is no
-initialized storage cluster.::
+initialized storage cluster::
$ gnt-storage init-distributed-storage -s{--storage-type} ceph \
<node-group>
@@ -138,18 +147,18 @@
Ensure that no other node-group is configured as distributed storage
cluster and configure ceph on the specified node-group. If there is no
node in the node-group, it'll only be marked as distributed storage
-enabled and no action will be taken.::
+enabled and no action will be taken::
$ gnt-group assign-nodes <group> <node>
It ensures that the node is offline if the node-group specified is
distributed storage capable. Ceph configuration on the newly assigned
-node is not performed at this step.::
+node is not performed at this step::
$ gnt-node --offline
If the node is part of storage node-group, an offline call will stop/remove
-ceph daemons.::
+ceph daemons::
$ gnt-node add --readd
diff --git a/doc/design-chained-jobs.rst b/doc/design-chained-jobs.rst
index 8f06dc0..66867dd 100644
--- a/doc/design-chained-jobs.rst
+++ b/doc/design-chained-jobs.rst
@@ -84,24 +84,24 @@
Example data structures::
- # First job
+ // First job
{
"job_id": "6151",
"ops": [
- { "OP_ID": "OP_INSTANCE_REPLACE_DISKS", ..., },
- { "OP_ID": "OP_INSTANCE_FAILOVER", ..., },
+ { "OP_ID": "OP_INSTANCE_REPLACE_DISKS", /*...*/ },
+ { "OP_ID": "OP_INSTANCE_FAILOVER", /*...*/ },
],
}
- # Second job, runs in parallel with first job
+ // Second job, runs in parallel with first job
{
"job_id": "7687",
"ops": [
- { "OP_ID": "OP_INSTANCE_MIGRATE", ..., },
+ { "OP_ID": "OP_INSTANCE_MIGRATE", /*...*/ }
],
}
- # Third job, depending on success of previous jobs
+ // Third job, depending on success of previous jobs
{
"job_id": "9218",
"ops": [
diff --git a/doc/design-draft.rst b/doc/design-draft.rst
index e7c47a3..e4c9f85 100644
--- a/doc/design-draft.rst
+++ b/doc/design-draft.rst
@@ -25,6 +25,7 @@
design-configlock.rst
design-multi-storage-htools.rst
design-macvtap.rst
+ design-global-hooks.rst
design-scsi-kvm.rst
design-disks.rst
diff --git a/doc/design-global-hooks.rst b/doc/design-global-hooks.rst
new file mode 100644
index 0000000..dab46fd
--- /dev/null
+++ b/doc/design-global-hooks.rst
@@ -0,0 +1,124 @@
+==============================================
+Better Ganeti customization using global hooks
+==============================================
+
+This document describes an enhancement of Ganeti's customisation by
+introducing global hooks that will be run before and after each Ganeti
+opcode even if the job process has dead.
+
+.. contents:: :depth: 4
+
+Current state and shortcomings
+==============================
+
+Currently, Ganeti allows customization of operations by running scripts
+in sub-directories of ``@SYSCONFDIR@/ganeti/hooks``. These
+sub-directories are named ``$hook-$phase.d``, where ``$phase`` is either
+``pre`` or ``post`` and ``$hook`` matches the directory name given for
+a hook (e.g. ``cluster-verify-post.d`` or ``node-add-pre.d``). Post
+hooks for opcodes don't run in case of the job process has died. The
+:doc:`hooks` design document describes currently existing hooks in
+more details.
+
+In some situations, e.g., reporting to upper level tools controlling
+Ganeti, it is desirable to run hooks before and after each opcode
+execution. Users currently work around this problem by creating symbolic
+links for each opcode directory. But in that case, a problem of the job
+processes death remains. In that case post-opcode hooks wouldn't run
+because the opcode execution didn't finish.
+
+Other problem is that some opcodes don't support hooks. That makes it
+impossible for external tools to setup hook for each opcode execution
+that might be useful, e.g. for monitoring puproses.
+
+Proposed changes
+================
+
+We propose to introduce a new type of hooks, the *global* hooks, that
+run before and after each opcode execution even in case of opcode
+process death. The organization of such hooks will be preserved the
+same as for the :ref:`existing per-opcode hooks <hooks-organization>`.
+The same :ref:`common variables <common-variables>` will be available as
+for the usual hooks. In addition to common variables,
+:ref:`additional variables <additional-variables>` and
+:ref:`specialized variables <specialized-variables>`, corresponding to
+the surrounded opcode, will also be provided. See
+:ref:`per-opcode hooks parameters documentation <opcode-params>` for
+more details.
+
+For the opcodes that are currently unsupported by hooks, and thus, don't
+presented in :ref:`opcodes list <opcode-params>`, only
+:ref:`common variables <common-variables>` and
+:ref:`additional variables <additional-variables>` will be available
+inside the *global* hooks. *OBJECT_TYPE* variable for such hooks will
+be initialized with special ``NOT_APPLICABLE`` value. The hooks will be
+executed only on master daemon as their opcodes won't provide any lists
+containing target nodes.
+
+For the *global* post hooks executing after a failure or death of
+the job process, only :ref:`common variables <common-variables>`
+(except OBJECT_TYPE) and
+:ref:`additional variables <additional-variables>` will be provided.
+
+.. _additional-variables:
+
+Additional variables
+~~~~~~~~~~~~~~~~~~~~
+
+The additional variable is introduced for both pre and post hooks
+in order to identify the current job:
+
+GANETI_JOB_ID
+ Id of the job current opcode belongs to.
+
+GANETI_IS_MASTER
+ The variable showing if the current node is a master node. It might
+ be useful e.g. if global hooks are used for the logging purposes.
+
+ The ``master`` value means that the node is the master node
+
+ The ``not_master`` value means that the node is not the master
+
+Due to the fact that global hooks will be executed even after job
+process has dead, a new environmental variable is introduced for the
+*global* post hooks:
+
+GANETI_POST_STATUS
+ String containing status of the opcode execution: ``success``,
+ ``error`` or ``disappeared``.
+
+ The ``success`` status means that the logical unit corresponding to
+ the opcode and the non-global post hooks for the opcodes have
+ succesfully finished.
+
+ The ``error`` status means that the corresponding logical unit or
+ the non-global hooks caused an exception which has been logged.
+
+ The ``disappeared`` status means that the job process has died during
+ the logical unit or the non-global hooks execution.
+
+Behaviour details
+~~~~~~~~~~~~~~~~~
+
+*Global* pre hooks will always be executed just before the usual pre
+hooks on the same node set and on the master node. The hooks
+execution result will be ignored. In case of opcodes which don't
+support hooks, *global* pre hooks also will be executed but only on
+the master node.
+
+With the post hooks the situation is more complicated. In case of
+successful job process execution, *global* hooks will be executed just
+after the usual post hooks have run and all the errors have been
+checked. In case of ``error`` status, *global* post hooks will be
+executed only on the master node from the exception handler. Just after
+the *global* post hooks execution, the exception will be raised again
+as usual. In case of job process disappear, the scheduler will execute
+the *global* post hooks in a separate process as soon as the job
+process death is registered. The *global* post hooks will be executed
+only for the opcodes with an initialized start_timestamp.
+
+.. vim: set textwidth=72 :
+.. Local Variables:
+.. mode: rst
+.. fill-column: 72
+.. End:
diff --git a/doc/design-migration-speed-hbal.rst b/doc/design-migration-speed-hbal.rst
index a0dcfe0..df053a6 100644
--- a/doc/design-migration-speed-hbal.rst
+++ b/doc/design-migration-speed-hbal.rst
@@ -26,3 +26,101 @@
by introducing ``--avoid-disk-moves *FACTOR*`` option which will admit disk
moves only if the gain in the cluster metrics is *FACTOR* times
higher than the gain achievable by non disk moves.
+
+Avoiding insignificant long-time solutions
+==========================================
+
+The next step is to estimate an amount of time required to perform a balancing
+step and to introduce a new term: ``long-time`` solution.
+
+``--long-solution-threshold`` option will specify a duration in seconds.
+A solution exceeding the duration is a ``long-time`` solution by definition.
+
+With time estimations we will be able to filter Hbal's sequences and
+eliminating long-time solutions which don't lead to a sufficient cluster metric
+improvement. This can be done by the ``--avoid-long-solutions *FACTOR*`` option
+which will allow only long solutions, whose K/N metrics are more than *FACTOR*,
+where K is the number of times cluster metric has increased and N is an
+estimated time to perform this solution divided by the threshold.
+
+The default values for the new options are:
+
+``--long-solution-threshold`` = 1000 seconds
+(all adequate solutions are not ``long-time``)
+
+``--avoid-long-solutions`` = 0.0
+(no filtering by time estimations, feature disabled)
+
+Network bandwidth cluster tags
+==============================
+
+The bandwidth between nodes, node groups and within whole cluster could be
+specified "by hand" with the cluster tags.
+
+Every node contains its own set of bandwidth tags. Tags from higher level are
+inherited by lower levels: nodegroups inherit cluster tags and nodes inherit
+nodegroups tags. Tags from lower level (if exist) override higher level tags
+with the same bandwidth prefixes: node tags override nodegroup tags as well as
+nodegroup tags override cluster tags.
+
+Below are some examples of using bandwidth tags:
+(The examples are provided using in the Htools Text backend format)
+
+1) single nodegroup with 5 nodes, all bandwidths symmetric.
+
+group-01|...|nic:100MBit|
+
+*no* node-level bandwidth tags (they are inherited by group tags)
+
+htools:bandwidth:nic
+htools:bandwidth:nic:100MBit::nic:100MBit::100
+
+
+2) 3 nodegroups, within each nodegroup symmetric bandwidths,
+between nodegroups different bandwidths.
+
+group-01|...|nic:1000MBit (overrides cluster tags)|
+group-02|...|nic:100MBit (overrides cluster tags)|
+group-03|...|inherited by cluster tags (nic:10MBit)|
+
+*no* node-level bandwidth tags (they are inherited by group tags)
+
+htools:bandwidth:nic
+nic:10MBit
+htools:bandwidth:nic:10MBit::nic:10MBit::10
+htools:bandwidth:nic:100MBit::nic:100MBit::100
+htools:bandwidth:nic:1000MBit::nic:1000MBit::1000
+htools:bandwidth:nic:10MBit::nic:100MBit::10
+htools:bandwidth:nic:10MBit::nic:1000MBit::10
+htools:bandwidth:nic:100MBit::nic:1000MBit::100
+
+
+Network bandwidth estimation
+============================
+
+Balancing time can be estimated by dividing the amount of data to be moved by
+the current network bandwidth between the affected nodes.
+
+We propose to add a new data collector, that will gather information about
+network speed by sending a test file between between nodes. Accounting the
+time, we can estimate average network speed between nodegroups in a cluster.
+
+DataCollector implementation details
+====================================
+
+The new bandwidth data collector introduces an ability to collect actual
+information about network speed between nodegroups in a cluster. We assume,
+that the network bandwidth between any nodes within one nodegroup is almost the
+same unlike the network speed between different nodegroups. So the proposed
+data collector will provide the most necessary data for time estimations.
+
+For sending packets *scp* utility will be used. The default size of file
+to send is 5Mbyte in order to obtain adequate values of the network speed.
+
+During *dcUpdate* every data collector sends a file with known size to a node
+from another nodegroup (chosen randomly) and measures time to perform it. MonD
+receives the response of *dcReport* from collectors, it fills nodegroup
+bandwidth map in nodes. The information collected will be used to estimate the
+time of balancing steps. In the case of existing network speed information for
+some node from bandwidth tags as well as from bandwidth data collector the last
+one will be chosen.
diff --git a/doc/design-node-add.rst b/doc/design-node-add.rst
index e1d460d..d4103f3 100644
--- a/doc/design-node-add.rst
+++ b/doc/design-node-add.rst
@@ -155,7 +155,7 @@
"cluster_name": "cluster.example.com",
"master_ip": "192.168.2.1",
"master_netdev": "br0",
- # …
+ // ...
}
``start_node_daemon``
diff --git a/doc/design-optables.rst b/doc/design-optables.rst
index 6c0c1e0..4c6599f 100644
--- a/doc/design-optables.rst
+++ b/doc/design-optables.rst
@@ -115,6 +115,8 @@
Filter chains are processed in increasing order of priority (lowest number
means highest priority), then watermark, then UUID.
+.. _filter-predicates:
+
Predicates available for the filter rules
-----------------------------------------
diff --git a/doc/design-os.rst b/doc/design-os.rst
index 1ff09c0..d89d133 100644
--- a/doc/design-os.rst
+++ b/doc/design-os.rst
@@ -460,6 +460,11 @@
user-provided value of the parameter, and ``<visibility>`` is either ``public``,
``private`` or ``secret``.
+The OS parameters can also be accessed individually by issuing a GET request
+to::
+
+ http://169.254.169.254/ganeti/<version>/os/parameters/<parameter>
+
The installation scripts to be run inside the virtualized environment will be
available at::
diff --git a/doc/design-ovf-support.rst b/doc/design-ovf-support.rst
index 1b972ae..6524b09 100644
--- a/doc/design-ovf-support.rst
+++ b/doc/design-ovf-support.rst
@@ -122,7 +122,7 @@
<gnt:VersionId/>
<gnt:AutoBalance/>
<gnt:Tags></gnt:Tags>
- <gnt:DiskTemplate</gnt:DiskTemplate>
+ <gnt:DiskTemplate></gnt:DiskTemplate>
<gnt:OperatingSystem>
<gnt:Name/>
<gnt:Parameters></gnt:Parameters>
diff --git a/doc/design-rapi-pam.rst b/doc/design-rapi-pam.rst
new file mode 100644
index 0000000..5d2679e
--- /dev/null
+++ b/doc/design-rapi-pam.rst
@@ -0,0 +1,138 @@
+===============================================
+RAPI authentication and authorization using PAM
+===============================================
+
+.. contents:: :depth: 4
+
+This design document describes a way of :doc:`rapi` authentication
+and authorization refactoring by using the pluggable authentication
+modules (PAM).
+
+Current State
+=============
+
+Currently :doc:`rapi` supports authentication using *basic auth* over
+https protocol. The users are stored in a file (usually
+``/var/lib/ganeti/rapi/users``) and have either read or write rights.
+Please read :ref:`rapi-users` for more details.
+
+.. _motivation:
+
+Motivation
+==========
+
+During GanetiCon 2015 the following features were requested by the
+community:
+
+- Support for different authentication methods;
+- Granular access to different RAPI command subsets;
+- Granular access to different target instances.
+
+The last two statements may be desired when an administrator wants to
+provide some restricted cluster or instance management rights for users.
+
+Proposed Implementation
+=======================
+
+Ganeti RAPI will use PAM for *authentication* and *account*
+(authorization) purposes. ``ganeti-basic`` PAM module performing
+*authentication* and *account* based on the contents of
+``ganeti/rapi/users`` file will be distributed with Ganeti. Ganeti rapi
+will interact with PAM using ``ganeti-rapi`` service name. The default
+configuration for ``ganeti-rapi`` PAM service will just use
+``ganeti-basic`` module.
+
+A good documentation on client-server PAM model is available
+at http://www.linux-pam.org/pre/doc/current-draft.txt.
+
+Authentication Specific Details
+-------------------------------
+
+In case of *basic auth* over http, the username and password will
+be extracted as they are presented in the
+:ref:`standard form <basic-protocol>`. Note, that independent from
+authentication method, all interactions will be performed via https
+protocol.
+
+In case of another authentication method, additional user's credintials
+(e.g. request signature) should be provided in
+``Ganeti-RAPI-Credential`` field. The field should be encoded using
+base64 algorithm as for the *basic auth* over http.
+
+Ganeti will copy the username to ``PAM_USER`` field of a ``pam_handler``
+and the contents of ``Ganeti-RAPI-Credential`` http header fielf to
+``PAM_AUTHTOK`` field of a ``pam_handler``.
+
+User's password will be send as a reply to each request made by
+*conversation function* with ``PAM_PROMPT_ECHO_OFF`` message constant.
+Other requests will be just ignored.
+
+Authorization Specific Details
+------------------------------
+
+Ganeti will pass several parameters that might be useful for the
+*authorization* phase to the modules via the private PAM environmental
+variables (using ``pam_setenv``)
+
+GANETI_RAPI_URI
+ The requested URI.
+GANETI_REQUEST_BODY
+ The body of a request if any or an empty string otherwise.
+GANETI_REQUEST_METHOD
+ The method of an http request (GET, PUT, POST or DELETE).
+GANETI_RESOURCE_ACCESS
+ The comma-separated access handlers of a resource if provided in
+ rlib2 or empty string otherwise.
+
+One More Time About the Goals
+=============================
+
+Support for Different Authentication Methods
+--------------------------------------------
+
+The proposed solution allows to use signatures of any kind instead of
+user password or in addition to it. It allows an administrator to
+support more complex and secure authentication schemes than just a basic
+authentication over http.
+
+Granular Access to Different Command Subsets
+--------------------------------------------
+
+This functionality can be implemented just by writing more complex
+authorization module that will permit or deny execution of some command
+based on the environment variables passed and some additional config
+file.
+
+Granular Access to Different Target Instances
+---------------------------------------------
+
+For such kind of authorization, a PAM module may be implemented as
+well. The main difference is that for complex access rights maintaining
+the module will have to store users rights and lists of owned objects
+on some kind of dynamic database instead of simple static config file.
+
+Switching Between the Old and the New Implementations
+-----------------------------------------------------
+
+As the changes introduced should be backwards compatible, a new
+ganeti-rapi daemon run-time option ``--enable_pam_rapi`` will be
+introduced.
+
+Other Changes
+=============
+
+As writing PAM module is an universal solution for the authorization
+problem, sometimes such flexibility is not necessary or not
+available because of disabled PAM. In that case it is still possible
+to provide granular access to the RAPI.
+
+For that purpose ``RAPI-Auth:username`` will be added to the reason
+trail just before sending a job for a further processing. That will
+allow to configure a filter that will reject job subsets initiated
+by some specific user i.e. add a user to a blacklist. See
+:doc:`design-optables` for more information about job filters.
+
+Additionally, we propose to introduce a new
+:ref:`filter predicate <filter-predicates>`, ``username`` that will
+contain the authenticated user's login and thus will make it possible to
+define an allowed user set for each operation.
diff --git a/doc/devnotes.rst b/doc/devnotes.rst
index df89328..ea691ba 100644
--- a/doc/devnotes.rst
+++ b/doc/devnotes.rst
@@ -25,7 +25,7 @@
- `pep8 <https://github.com/jcrocholl/pep8/>`_
- `PyYAML <http://pyyaml.org/>`_
-For older developement (Ganeti < 2.4) ``docbook`` was used instead of
+For older development (Ganeti < 2.4) ``docbook`` was used instead of
``pandoc``.
Note that for pylint, at the current moment the following versions
@@ -187,7 +187,8 @@
For Python tests::
$ export PYTHONPATH=$PWD
- $ python ./test/py/ganeti.%mytest%
+ $ python ./test/py/ganeti.%mytest% # to run a complete test file
+ $ python ./test/py/ganeti.%mytest% Class.testMethod # to run one method
For Haskell tests::
diff --git a/doc/hooks.rst b/doc/hooks.rst
index 667906b..6d2b071 100644
--- a/doc/hooks.rst
+++ b/doc/hooks.rst
@@ -22,6 +22,8 @@
If you want to use hooks scripts, create it on all nodes. This applies
also to all sub directories such as ``node-add-pre.d``.
+.. _hooks-organization:
+
Organisation
------------
@@ -122,6 +124,8 @@
Operation list
--------------
+.. _opcode-params:
+
Node operations
~~~~~~~~~~~~~~~
@@ -580,6 +584,8 @@
scripts receive another set of variables, prefixed with *GANETI_POST_*,
representing the status after the opcode executed.
+.. _common-variables:
+
Common variables
~~~~~~~~~~~~~~~~
@@ -609,6 +615,7 @@
The path to the Ganeti configuration directory (to read, for
example, the *ssconf* files).
+.. _specialized-variables:
Specialised variables
~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/index.rst b/doc/index.rst
index a8b3fba..e5535eb 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -140,6 +140,7 @@
design-plain-redundancy.rst
design-query2.rst
design-query-splitting.rst
+ design-rapi-pam.rst
design-reason-trail.rst
design-repaird.rst
design-restricted-commands.rst
diff --git a/doc/rapi.rst b/doc/rapi.rst
index d6cab78..dfa436d 100644
--- a/doc/rapi.rst
+++ b/doc/rapi.rst
@@ -107,6 +107,7 @@
In the current version ``ganeti-rapi``'s realm, ``Ganeti Remote
API``, can only be changed by modifying the source code.
+.. _basic-protocol:
Protocol
--------
@@ -416,7 +417,7 @@
"memory": 128
}
},
- …
+ // ...
}
@@ -680,11 +681,11 @@
[
{
"name": "group1",
- "uri": "\/2\/groups\/group1"
+ "uri": "/2/groups/group1"
},
{
"name": "group2",
- "uri": "\/2\/groups\/group2"
+ "uri": "/2/groups/group2"
}
]
@@ -705,7 +706,7 @@
"node2.example.com"
],
"uuid": "0d7d407c-262e-49af-881a-6a430034bf43",
- …
+ // ...
},
{
"name": "group2",
@@ -714,9 +715,9 @@
"node3.example.com"
],
"uuid": "f5a277e7-68f9-44d3-a378-4b25ecb5df5c",
- …
+ // ...
},
- …
+ // ...
]
@@ -895,6 +896,7 @@
``DELETE``
~~~~~~~~~~
+.. highlight:: none
Delete a tag.
@@ -904,6 +906,7 @@
/tags?tag=[tag]&tag=[tag]
It supports the ``dry-run`` argument.
+.. highlight:: javascript
.. _rapi-res-networks:
@@ -928,11 +931,11 @@
[
{
"name": "network1",
- "uri": "\/2\/networks\/network1"
+ "uri": "/2/networks/network1"
},
{
"name": "network2",
- "uri": "\/2\/networks\/network2"
+ "uri": "/2/networks/network2"
}
]
@@ -949,19 +952,19 @@
'external_reservations': '10.0.0.0, 10.0.0.1, 10.0.0.15',
'free_count': 13,
'gateway': '10.0.0.1',
- 'gateway6': None,
+ 'gateway6': null,
'group_list': ['default(bridged, prv0)'],
'inst_list': [],
- 'mac_prefix': None,
+ 'mac_prefix': null,
'map': 'XX.............X',
'name': 'nat',
'network': '10.0.0.0/28',
- 'network6': None,
+ 'network6': null,
'reserved_count': 3,
'tags': ['nfdhcpd'],
- …
+ // ...
},
- …
+ // ...
]
@@ -1138,6 +1141,7 @@
``DELETE``
~~~~~~~~~~
+.. highlight:: none
Delete a tag.
@@ -1148,6 +1152,7 @@
It supports the ``dry-run`` argument.
+..highlight:: javascript
.. _rapi-res-instances-multi-alloc:
@@ -1195,11 +1200,11 @@
[
{
"name": "web.example.com",
- "uri": "\/instances\/web.example.com"
+ "uri": "/instances/web.example.com"
},
{
"name": "mail.example.com",
- "uri": "\/instances\/mail.example.com"
+ "uri": "/instances/mail.example.com"
}
]
@@ -1234,9 +1239,9 @@
"admin_state": true,
"os": "debian-etch",
"oper_state": true,
- …
+ // ...
},
- …
+ // ...
]
@@ -2038,11 +2043,11 @@
[
{
"id": "node1.example.com",
- "uri": "\/nodes\/node1.example.com"
+ "uri": "/nodes/node1.example.com"
},
{
"id": "node2.example.com",
- "uri": "\/nodes\/node2.example.com"
+ "uri": "/nodes/node2.example.com"
}
]
@@ -2066,9 +2071,9 @@
"sinst_cnt": 2,
"dfree": 5171712,
"offline": false,
- …
+ // ...
},
- …
+ // ...
]
diff --git a/lib/backend.py b/lib/backend.py
index 58c8b3a..3a71530 100644
--- a/lib/backend.py
+++ b/lib/backend.py
@@ -333,8 +333,9 @@
"""
def decorator(fn):
def wrapper(*args, **kwargs):
- _, myself = ssconf.GetMasterAndMyself()
- nodes = ([myself], [myself]) # these hooks run locally
+ # Despite the hooks run locally, we still have to pass an uuid which
+ # will be ignored in RunLocalHooks then.
+ nodes = ([constants.DUMMY_UUID], [constants.DUMMY_UUID])
env_fn = compat.partial(env_builder_fn, *args, **kwargs)
@@ -2587,12 +2588,6 @@
if iname not in hyper.ListInstances(hvparams=instance.hvparams):
_Fail("Instance %s is not running", iname)
- for idx in range(len(instance.disks_info)):
- link_name = _GetBlockDevSymlinkPath(iname, idx)
- if not os.path.islink(link_name):
- logging.warning("Instance %s is missing symlink %s for disk %d",
- iname, link_name, idx)
-
def GetAllInstancesInfo(hypervisor_list, all_hvparams):
"""Gather data about all instances.
@@ -2785,19 +2780,27 @@
" log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
-def _GetBlockDevSymlinkPath(instance_name, idx, _dir=None):
+def _GetBlockDevSymlinkPath(instance_name, idx=None, uuid=None, _dir=None):
"""Returns symlink path for block device.
"""
if _dir is None:
_dir = pathutils.DISK_LINKS_DIR
+ assert idx is not None or uuid is not None
+
+ # Using the idx is deprecated. Use the uuid instead if it is available.
+ if uuid:
+ ident = uuid
+ else:
+ ident = idx
+
return utils.PathJoin(_dir,
("%s%s%s" %
- (instance_name, constants.DISK_SEPARATOR, idx)))
+ (instance_name, constants.DISK_SEPARATOR, ident)))
-def _SymlinkBlockDev(instance_name, device_path, idx):
+def _SymlinkBlockDev(instance_name, device_path, idx=None, uuid=None):
"""Set up symlinks to a instance's block device.
This is an auxiliary function run when an instance is start (on the primary
@@ -2807,6 +2810,7 @@
@param instance_name: the name of the target instance
@param device_path: path of the physical block device, on the node
@param idx: the disk index
+ @param uuid: the disk uuid
@return: absolute path to the disk's symlink
"""
@@ -2814,7 +2818,8 @@
if not device_path:
return None
- link_name = _GetBlockDevSymlinkPath(instance_name, idx)
+ link_name = _GetBlockDevSymlinkPath(instance_name, idx, uuid)
+
try:
os.symlink(device_path, link_name)
except OSError, err:
@@ -2833,14 +2838,20 @@
"""Remove the block device symlinks belonging to the given instance.
"""
- for idx, _ in enumerate(disks):
- link_name = _GetBlockDevSymlinkPath(instance_name, idx)
+ def _remove_symlink(link_name):
if os.path.islink(link_name):
try:
os.remove(link_name)
except OSError:
logging.exception("Can't remove symlink '%s'", link_name)
+ for idx, disk in enumerate(disks):
+ link_name = _GetBlockDevSymlinkPath(instance_name, uuid=disk.uuid)
+ _remove_symlink(link_name)
+ # Remove also the deprecated symlink (if any)
+ link_name = _GetBlockDevSymlinkPath(instance_name, idx=idx)
+ _remove_symlink(link_name)
+
def _CalculateDeviceURI(instance, disk, device):
"""Get the URI for the device.
@@ -2884,7 +2895,11 @@
str(disk))
device.Open()
try:
- link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx)
+ # Create both index-based and uuid-based symlinks
+ # for backwards compatibility
+ _SymlinkBlockDev(instance.name, device.dev_path, idx=idx)
+ link_name = _SymlinkBlockDev(instance.name, device.dev_path,
+ uuid=disk.uuid)
except OSError, e:
raise errors.BlockDeviceError("Cannot create block device symlink: %s" %
e.strerror)
@@ -3685,7 +3700,10 @@
link_name = None
uri = None
if as_primary:
- link_name = _SymlinkBlockDev(instance.name, dev_path, idx)
+ # Create both index-based and uuid-based symlinks
+ # for backwards compatibility
+ _SymlinkBlockDev(instance.name, dev_path, idx=idx)
+ link_name = _SymlinkBlockDev(instance.name, dev_path, uuid=disk.uuid)
uri = _CalculateDeviceURI(instance, disk, result)
elif result:
return result, result
@@ -4902,7 +4920,11 @@
for idx, rd in enumerate(bdevs):
try:
rd.Open(exclusive=exclusive)
- _SymlinkBlockDev(instance_name, rd.dev_path, idx)
+ _SymlinkBlockDev(instance_name, rd.dev_path, uuid=disks[idx].uuid)
+ # Also create an old type of symlink so that instances
+ # can be migratable, since they may still have deprecated
+ # symlinks in their runtime files.
+ _SymlinkBlockDev(instance_name, rd.dev_path, idx=idx)
except errors.BlockDeviceError, err:
msg.append(str(err))
@@ -5961,8 +5983,7 @@
"""
assert len(node_list) == 1
node = node_list[0]
- _, myself = ssconf.GetMasterAndMyself()
- assert node == myself
+ assert node == constants.DUMMY_UUID
results = self.RunHooks(hpath, phase, env)
diff --git a/lib/bootstrap.py b/lib/bootstrap.py
index 8eb0b4c..50c60b7 100644
--- a/lib/bootstrap.py
+++ b/lib/bootstrap.py
@@ -354,8 +354,14 @@
constants.ST_FILE, constants.ST_SHARED_FILE, constants.ST_GLUSTER
))
+ file_storage_enabled = file_disk_template in enabled_disk_templates
+
if file_storage_dir is None:
- file_storage_dir = default_dir
+ if file_storage_enabled:
+ file_storage_dir = default_dir
+ else:
+ file_storage_dir = ""
+
if not acceptance_fn:
acceptance_fn = \
lambda path: filestorage.CheckFileStoragePathAcceptance(
@@ -364,7 +370,6 @@
_storage_path_acceptance_fn(logging.warning, file_storage_dir,
enabled_disk_templates)
- file_storage_enabled = file_disk_template in enabled_disk_templates
if file_storage_enabled:
try:
acceptance_fn(file_storage_dir)
diff --git a/lib/cli_opts.py b/lib/cli_opts.py
index c81355d..334c961 100644
--- a/lib/cli_opts.py
+++ b/lib/cli_opts.py
@@ -1189,8 +1189,7 @@
help="Specify the default directory (cluster-wide) for mounting Gluster"
" file systems [%s]" %
pathutils.DEFAULT_GLUSTER_STORAGE_DIR,
- metavar="GLUSTERDIR",
- default=pathutils.DEFAULT_GLUSTER_STORAGE_DIR)
+ metavar="GLUSTERDIR", default=None)
NOMODIFY_ETCHOSTS_OPT = cli_option("--no-etc-hosts", dest="modify_etc_hosts",
help="Don't modify %s" % pathutils.ETC_HOSTS,
diff --git a/lib/client/gnt_cluster.py b/lib/client/gnt_cluster.py
index 2cc8328..7c575c0 100644
--- a/lib/client/gnt_cluster.py
+++ b/lib/client/gnt_cluster.py
@@ -2523,8 +2523,7 @@
"verify": (
VerifyCluster, ARGS_NONE,
[VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT,
- DRY_RUN_OPT, PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT,
- VERIFY_CLUTTER_OPT],
+ PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT, VERIFY_CLUTTER_OPT],
"", "Does a check on the cluster configuration"),
"verify-disks": (
VerifyDisks, ARGS_NONE, [PRIORITY_OPT, NODEGROUP_OPT, STRICT_OPT],
diff --git a/lib/cmdlib/cluster/__init__.py b/lib/cmdlib/cluster/__init__.py
index 8182910..924dd8c 100644
--- a/lib/cmdlib/cluster/__init__.py
+++ b/lib/cmdlib/cluster/__init__.py
@@ -323,7 +323,7 @@
master_params = self.cfg.GetMasterNetworkParameters()
# Run post hooks on master node before it's removed
- RunPostHook(self, self.cfg.GetNodeName(master_params.uuid))
+ RunPostHook(self, master_params.uuid)
ems = self.cfg.GetUseExternalMipScript()
result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
@@ -1668,6 +1668,10 @@
self.cluster = self.cfg.GetClusterInfo()
ensure_kvmd = False
+ stop_kvmd_silently = not (
+ constants.HT_KVM in self.cluster.enabled_hypervisors or
+ (self.op.enabled_hypervisors is not None and
+ constants.HT_KVM in self.op.enabled_hypervisors))
active = constants.DATA_COLLECTOR_STATE_ACTIVE
if self.op.enabled_data_collectors is not None:
@@ -1836,7 +1840,7 @@
# this will update the cluster object and sync 'Ssconf', and kvmd
# uses 'Ssconf'.
if ensure_kvmd:
- EnsureKvmdOnNodes(self, feedback_fn)
+ EnsureKvmdOnNodes(self, feedback_fn, silent_stop=stop_kvmd_silently)
if self.op.compression_tools is not None:
self.cfg.SetCompressionTools(self.op.compression_tools)
diff --git a/lib/cmdlib/common.py b/lib/cmdlib/common.py
index 6ee86b9..95a1186 100644
--- a/lib/cmdlib/common.py
+++ b/lib/cmdlib/common.py
@@ -178,16 +178,19 @@
return (inst_uuids, [lu.cfg.GetInstanceName(uuid) for uuid in inst_uuids])
-def RunPostHook(lu, node_name):
+def RunPostHook(lu, node_uuid):
"""Runs the post-hook for an opcode on a single node.
"""
hm = lu.proc.BuildHooksManager(lu)
try:
- hm.RunPhase(constants.HOOKS_PHASE_POST, node_names=[node_name])
+ # Execute usual post hooks, then global post hooks.
+ hm.RunPhase(constants.HOOKS_PHASE_POST, node_uuids=[node_uuid])
+ hm.RunPhase(constants.HOOKS_PHASE_POST, [node_uuid], is_global=True,
+ post_status=constants.POST_HOOKS_STATUS_SUCCESS)
except Exception, err: # pylint: disable=W0703
lu.LogWarning("Errors occurred running hooks on %s: %s",
- node_name, err)
+ node_uuid, err)
def RedistributeAncillaryFiles(lu):
@@ -1537,7 +1540,7 @@
return math.ceil(byte_size / 1024. / 1024.)
-def EnsureKvmdOnNodes(lu, feedback_fn, nodes=None):
+def EnsureKvmdOnNodes(lu, feedback_fn, nodes=None, silent_stop=False):
"""Ensure KVM daemon is running on nodes with KVM instances.
If user shutdown is enabled in the cluster:
@@ -1560,6 +1563,9 @@
@param nodes: if supplied, it overrides the node uuids to start/stop;
this is used mainly for optimization
+ @type silent_stop: bool
+ @param silent_stop: if we should suppress warnings in case KVM daemon is
+ already stopped
"""
cluster = lu.cfg.GetClusterInfo()
@@ -1594,9 +1600,10 @@
# Stop KVM where necessary
if stop_nodes:
results = lu.rpc.call_node_ensure_daemon(stop_nodes, constants.KVMD, False)
- for node_uuid in stop_nodes:
- results[node_uuid].Warn("Failed to stop KVM daemon in node '%s'" %
- node_uuid, feedback_fn)
+ if not silent_stop:
+ for node_uuid in stop_nodes:
+ results[node_uuid].Warn("Failed to stop KVM daemon in node '%s'" %
+ node_uuid, feedback_fn)
def WarnAboutFailedSshUpdates(result, master_uuid, feedback_fn):
diff --git a/lib/cmdlib/node.py b/lib/cmdlib/node.py
index d1eae5e..fa20517 100644
--- a/lib/cmdlib/node.py
+++ b/lib/cmdlib/node.py
@@ -489,7 +489,9 @@
else:
self.cfg.RemoveNodeFromCandidateCerts(self.new_node.uuid, warn_fn=None)
- EnsureKvmdOnNodes(self, feedback_fn, nodes=[self.new_node.uuid])
+ # Ensure, that kvmd is in the expected state on the added node.
+ EnsureKvmdOnNodes(self, feedback_fn, nodes=[self.new_node.uuid],
+ silent_stop=True)
# Update SSH setup of all nodes
if self.op.node_setup:
@@ -877,7 +879,11 @@
if self.old_role == self._ROLE_CANDIDATE:
RemoveNodeCertFromCandidateCerts(self.cfg, node.uuid)
- EnsureKvmdOnNodes(self, feedback_fn, nodes=[node.uuid])
+ # KVM configuration never changes here, so disable warnings if KVM disabled.
+ silent_stop = constants.HT_KVM not in \
+ self.cfg.GetClusterInfo().enabled_hypervisors
+ EnsureKvmdOnNodes(self, feedback_fn, nodes=[node.uuid],
+ silent_stop=silent_stop)
# this will trigger job queue propagation or cleanup if the mc
# flag changed
@@ -1611,7 +1617,7 @@
self.cfg.RemoveNode(self.node.uuid)
# Run post hooks on the node before it's removed
- RunPostHook(self, self.node.name)
+ RunPostHook(self, self.node.uuid)
# we have to call this by name rather than by UUID, as the node is no longer
# in the config
diff --git a/lib/errors.py b/lib/errors.py
index 826e761..5fb885b 100644
--- a/lib/errors.py
+++ b/lib/errors.py
@@ -469,6 +469,12 @@
"""
+class PamRapiAuthError(GenericError):
+ """Error in PAM remote API authenticator initialization.
+
+ """
+
+
class JobSubmittedException(Exception):
"""Job was submitted, client should exit.
@@ -476,7 +482,6 @@
submitted. The handler should print this ID.
This is not an error, just a structured way to exit from clients.
-
"""
diff --git a/lib/hooksmaster.py b/lib/hooksmaster.py
index c23d857..51ee560 100644
--- a/lib/hooksmaster.py
+++ b/lib/hooksmaster.py
@@ -39,7 +39,7 @@
from ganeti import pathutils
-def _RpcResultsToHooksResults(rpc_results):
+def RpcResultsToHooksResults(rpc_results):
"""Function to convert RPC results to the format expected by HooksMaster.
@type rpc_results: dict(node: L{rpc.RpcResult})
@@ -56,7 +56,8 @@
class HooksMaster(object):
def __init__(self, opcode, hooks_path, nodes, hooks_execution_fn,
hooks_results_adapt_fn, build_env_fn, prepare_post_nodes_fn,
- log_fn, htype=None, cluster_name=None, master_name=None):
+ log_fn, htype=None, cluster_name=None, master_name=None,
+ master_uuid=None, job_id=None):
"""Base class for hooks masters.
This class invokes the execution of hooks according to the behaviour
@@ -93,6 +94,10 @@
@param cluster_name: name of the cluster
@type master_name: string
@param master_name: name of the master
+ @type master_uuid: string
+ @param master_uuid: uuid of the master
+ @type job_id: int
+ @param job_id: the id of the job process (used in global post hooks)
"""
self.opcode = opcode
@@ -105,6 +110,8 @@
self.htype = htype
self.cluster_name = cluster_name
self.master_name = master_name
+ self.master_uuid = master_uuid
+ self.job_id = job_id
self.pre_env = self._BuildEnv(constants.HOOKS_PHASE_PRE)
(self.pre_nodes, self.post_nodes) = nodes
@@ -151,7 +158,24 @@
return env
- def _RunWrapper(self, node_list, hpath, phase, phase_env):
+ def _CheckParamsAndExecHooks(self, node_list, hpath, phase, env):
+ """Check rpc parameters and call hooks_execution_fn (rpc).
+
+ """
+ if node_list is None or not node_list:
+ return {}
+
+ # Convert everything to strings
+ env = dict([(str(key), str(val)) for key, val in env.iteritems()])
+ assert compat.all(key == "PATH" or key.startswith("GANETI_")
+ for key in env)
+ for node in node_list:
+ assert utils.UUID_RE.match(node), "Invalid node uuid %s" % node
+
+ return self.hooks_execution_fn(node_list, hpath, phase, env)
+
+ def _RunWrapper(self, node_list, hpath, phase, phase_env, is_global=False,
+ post_status=None):
"""Simple wrapper over self.callfn.
This method fixes the environment before executing the hooks.
@@ -175,18 +199,34 @@
if self.master_name is not None:
env["GANETI_MASTER"] = self.master_name
+ if self.job_id and is_global:
+ env["GANETI_JOB_ID"] = self.job_id
+ if phase == constants.HOOKS_PHASE_POST and is_global:
+ assert post_status is not None
+ env["GANETI_POST_STATUS"] = post_status
+
if phase_env:
env = utils.algo.JoinDisjointDicts(env, phase_env)
- # Convert everything to strings
- env = dict([(str(key), str(val)) for key, val in env.iteritems()])
+ if not is_global:
+ return self._CheckParamsAndExecHooks(node_list, hpath, phase, env)
- assert compat.all(key == "PATH" or key.startswith("GANETI_")
- for key in env)
+ # For global hooks, we need to send different env values to master and
+ # to the others
+ ret = dict()
+ env["GANETI_IS_MASTER"] = constants.GLOBAL_HOOKS_MASTER
+ master_set = frozenset([self.master_uuid])
+ ret.update(self._CheckParamsAndExecHooks(master_set, hpath, phase, env))
- return self.hooks_execution_fn(node_list, hpath, phase, env)
+ if node_list:
+ node_list = frozenset(set(node_list) - master_set)
+ env["GANETI_IS_MASTER"] = constants.GLOBAL_HOOKS_NOT_MASTER
+ ret.update(self._CheckParamsAndExecHooks(node_list, hpath, phase, env))
- def RunPhase(self, phase, node_names=None):
+ return ret
+
+ def RunPhase(self, phase, node_uuids=None, is_global=False,
+ post_status=None):
"""Run all the scripts for a phase.
This is the main function of the HookMaster.
@@ -196,33 +236,37 @@
@param phase: one of L{constants.HOOKS_PHASE_POST} or
L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
- @param node_names: overrides the predefined list of nodes for the given
+ @param node_uuids: overrides the predefined list of nodes for the given
phase
+ @param is_global: whether global or per-opcode hooks should be executed
+ @param post_status: the job execution status for the global post hooks
@return: the processed results of the hooks multi-node rpc call
@raise errors.HooksFailure: on communication failure to the nodes
@raise errors.HooksAbort: on failure of one of the hooks
"""
if phase == constants.HOOKS_PHASE_PRE:
- if node_names is None:
- node_names = self.pre_nodes
+ if node_uuids is None:
+ node_uuids = self.pre_nodes
env = self.pre_env
elif phase == constants.HOOKS_PHASE_POST:
- if node_names is None:
- node_names = self.post_nodes
- if node_names is not None and self.prepare_post_nodes_fn is not None:
- node_names = frozenset(self.prepare_post_nodes_fn(list(node_names)))
+ if node_uuids is None:
+ node_uuids = self.post_nodes
+ if node_uuids is not None and self.prepare_post_nodes_fn is not None:
+ node_uuids = frozenset(self.prepare_post_nodes_fn(list(node_uuids)))
env = self._BuildEnv(phase)
else:
raise AssertionError("Unknown phase '%s'" % phase)
- if not node_names:
+ if not node_uuids and not is_global:
# empty node list, we should not attempt to run this as either
# we're in the cluster init phase and the rpc client part can't
# even attempt to run, or this LU doesn't do hooks at all
return
- results = self._RunWrapper(node_names, self.hooks_path, phase, env)
+ hooks_path = constants.GLOBAL_HOOKS_DIR if is_global else self.hooks_path
+ results = self._RunWrapper(node_uuids, hooks_path, phase, env, is_global,
+ post_status)
if not results:
msg = "Communication Failure"
if phase == constants.HOOKS_PHASE_PRE:
@@ -268,11 +312,11 @@
"""
phase = constants.HOOKS_PHASE_POST
hpath = constants.HOOKS_NAME_CFGUPDATE
- nodes = [self.master_name]
+ nodes = [self.master_uuid]
self._RunWrapper(nodes, hpath, phase, self.pre_env)
@staticmethod
- def BuildFromLu(hooks_execution_fn, lu):
+ def BuildFromLu(hooks_execution_fn, lu, job_id=None):
if lu.HPATH is None:
nodes = (None, None)
else:
@@ -285,9 +329,25 @@
master_name = cluster_name = None
if lu.cfg:
master_name = lu.cfg.GetMasterNodeName()
+ master_uuid = lu.cfg.GetMasterNode()
cluster_name = lu.cfg.GetClusterName()
return HooksMaster(lu.op.OP_ID, lu.HPATH, nodes, hooks_execution_fn,
- _RpcResultsToHooksResults, lu.BuildHooksEnv,
+ RpcResultsToHooksResults, lu.BuildHooksEnv,
lu.PreparePostHookNodes, lu.LogWarning, lu.HTYPE,
- cluster_name, master_name)
+ cluster_name, master_name, master_uuid, job_id)
+
+
+def ExecGlobalPostHooks(opcode, master_name, rpc_runner, log_fn,
+ cluster_name, master_uuid, job_id, status):
+ """ Build hooks manager and execute global post hooks just on the master
+
+ """
+ hm = HooksMaster(opcode, hooks_path=None, nodes=([], [master_uuid]),
+ hooks_execution_fn=rpc_runner,
+ hooks_results_adapt_fn=RpcResultsToHooksResults,
+ build_env_fn=None, prepare_post_nodes_fn=None,
+ log_fn=log_fn, htype=None, cluster_name=cluster_name,
+ master_name=master_name, master_uuid=master_uuid,
+ job_id=job_id)
+ hm.RunPhase(constants.HOOKS_PHASE_POST, is_global=True, post_status=status)
diff --git a/lib/http/auth.py b/lib/http/auth.py
index 35b0b32..ce2e663 100644
--- a/lib/http/auth.py
+++ b/lib/http/auth.py
@@ -38,7 +38,6 @@
from ganeti import compat
from ganeti import http
-from ganeti import utils
from cStringIO import StringIO
@@ -137,8 +136,8 @@
if not realm:
raise AssertionError("No authentication realm")
- # Check "Authorization" header
- if self._CheckAuthorization(req):
+ # Check Authentication
+ if self.Authenticate(req):
# User successfully authenticated
return
@@ -156,24 +155,25 @@
raise http.HttpUnauthorized(headers=headers)
- def _CheckAuthorization(self, req):
- """Checks 'Authorization' header sent by client.
+ @staticmethod
+ def ExtractUserPassword(req):
+ """Extracts a user and a password from the http authorization header.
@type req: L{http.server._HttpServerRequest}
- @param req: HTTP request context
- @rtype: bool
- @return: Whether user is allowed to execute request
-
+ @param req: HTTP request
+ @rtype: (str, str)
+ @return: A tuple containing a user and a password. One or both values
+ might be None if they are not presented
"""
credentials = req.request_headers.get(http.HTTP_AUTHORIZATION, None)
if not credentials:
- return False
+ return None, None
# Extract scheme
parts = credentials.strip().split(None, 2)
if len(parts) < 1:
# Missing scheme
- return False
+ return None, None
# RFC2617, section 1.2: "[...] It uses an extensible, case-insensitive
# token to identify the authentication scheme [...]"
@@ -184,7 +184,7 @@
if len(parts) < 2:
raise http.HttpBadRequest(message=("Basic authentication requires"
" credentials"))
- return self._CheckBasicAuthorization(req, parts[1])
+ return HttpServerRequestAuthentication._ExtractBasicUserPassword(parts[1])
elif scheme == HTTP_DIGEST_AUTH.lower():
# TODO: Implement digest authentication
@@ -194,49 +194,82 @@
pass
# Unsupported authentication scheme
- return False
+ return None, None
- def _CheckBasicAuthorization(self, req, in_data):
- """Checks credentials sent for basic authentication.
+ @staticmethod
+ def _ExtractBasicUserPassword(in_data):
+ """Extracts user and password from the contents of an authorization header.
- @type req: L{http.server._HttpServerRequest}
- @param req: HTTP request context
@type in_data: str
@param in_data: Username and password encoded as Base64
- @rtype: bool
- @return: Whether user is allowed to execute request
+ @rtype: (str, str)
+ @return: A tuple containing user and password. One or both values might be
+ None if they are not presented
"""
try:
creds = base64.b64decode(in_data.encode("ascii")).decode("ascii")
except (TypeError, binascii.Error, UnicodeError):
logging.exception("Error when decoding Basic authentication credentials")
- return False
+ raise http.HttpBadRequest(message=("Invalid basic authorization header"))
if ":" not in creds:
- return False
+ # We have just a username without password
+ return creds, None
- (user, password) = creds.split(":", 1)
+ # return (user, password) tuple
+ return creds.split(":", 1)
- return self.Authenticate(req, user, password)
-
- def Authenticate(self, req, user, password):
- """Checks the password for a user.
+ def Authenticate(self, req):
+ """Checks the credentiales.
This function MUST be overridden by a subclass.
"""
raise NotImplementedError()
- def VerifyBasicAuthPassword(self, req, username, password, expected):
+ @staticmethod
+ def ExtractSchemePassword(expected_password):
+ """Extracts a scheme and a password from the expected_password.
+
+ @type expected_password: str
+ @param expected_password: Username and password encoded as Base64
+ @rtype: (str, str)
+ @return: A tuple containing a scheme and a password. Both values will be
+ None when an invalid scheme or password encoded
+
+ """
+ if expected_password is None:
+ return None, None
+ # Backwards compatibility for old-style passwords without a scheme
+ if not expected_password.startswith("{"):
+ expected_password = (HttpServerRequestAuthentication._CLEARTEXT_SCHEME +
+ expected_password)
+
+ # Check again, just to be sure
+ if not expected_password.startswith("{"):
+ raise AssertionError("Invalid scheme")
+
+ scheme_end_idx = expected_password.find("}", 1)
+
+ # Ensure scheme has a length of at least one character
+ if scheme_end_idx <= 1:
+ logging.warning("Invalid scheme in password")
+ return None, None
+
+ scheme = expected_password[:scheme_end_idx + 1].upper()
+ password = expected_password[scheme_end_idx + 1:]
+
+ return scheme, password
+
+ @staticmethod
+ def VerifyBasicAuthPassword(username, password, expected, realm):
"""Checks the password for basic authentication.
As long as they don't start with an opening brace ("E{lb}"), old passwords
are supported. A new scheme uses H(A1) from RFC2617, where H is MD5 and A1
consists of the username, the authentication realm and the actual password.
- @type req: L{http.server._HttpServerRequest}
- @param req: HTTP request context
@type username: string
@param username: Username from HTTP headers
@type password: string
@@ -244,33 +277,21 @@
@type expected: string
@param expected: Expected password with optional scheme prefix (e.g. from
users file)
+ @type realm: string
+ @param realm: Authentication realm
"""
- # Backwards compatibility for old-style passwords without a scheme
- if not expected.startswith("{"):
- expected = self._CLEARTEXT_SCHEME + expected
-
- # Check again, just to be sure
- if not expected.startswith("{"):
- raise AssertionError("Invalid scheme")
-
- scheme_end_idx = expected.find("}", 1)
-
- # Ensure scheme has a length of at least one character
- if scheme_end_idx <= 1:
- logging.warning("Invalid scheme in password for user '%s'", username)
+ scheme, expected_password = HttpServerRequestAuthentication \
+ .ExtractSchemePassword(expected)
+ if scheme is None or password is None:
return False
- scheme = expected[:scheme_end_idx + 1].upper()
- expected_password = expected[scheme_end_idx + 1:]
-
# Good old plain text password
- if scheme == self._CLEARTEXT_SCHEME:
+ if scheme == HttpServerRequestAuthentication._CLEARTEXT_SCHEME:
return password == expected_password
# H(A1) as described in RFC2617
- if scheme == self._HA1_SCHEME:
- realm = self.GetAuthRealm(req)
+ if scheme == HttpServerRequestAuthentication._HA1_SCHEME:
if not realm:
# There can not be a valid password for this case
raise AssertionError("No authentication realm")
@@ -284,56 +305,3 @@
scheme, username)
return False
-
-
-class PasswordFileUser(object):
- """Data structure for users from password file.
-
- """
- def __init__(self, name, password, options):
- self.name = name
- self.password = password
- self.options = options
-
-
-def ParsePasswordFile(contents):
- """Parses the contents of a password file.
-
- Lines in the password file are of the following format::
-
- <username> <password> [options]
-
- Fields are separated by whitespace. Username and password are mandatory,
- options are optional and separated by comma (','). Empty lines and comments
- ('#') are ignored.
-
- @type contents: str
- @param contents: Contents of password file
- @rtype: dict
- @return: Dictionary containing L{PasswordFileUser} instances
-
- """
- users = {}
-
- for line in utils.FilterEmptyLinesAndComments(contents):
- parts = line.split(None, 2)
- if len(parts) < 2:
- # Invalid line
- # TODO: Return line number from FilterEmptyLinesAndComments
- logging.warning("Ignoring non-comment line with less than two fields")
- continue
-
- name = parts[0]
- password = parts[1]
-
- # Extract options
- options = []
- if len(parts) >= 3:
- for part in parts[2].split(","):
- options.append(part.strip())
- else:
- logging.warning("Ignoring values for user '%s': %s", name, parts[3:])
-
- users[name] = PasswordFileUser(name, password, options)
-
- return users
diff --git a/lib/hypervisor/hv_kvm/__init__.py b/lib/hypervisor/hv_kvm/__init__.py
index 580606b..f3c1d60 100644
--- a/lib/hypervisor/hv_kvm/__init__.py
+++ b/lib/hypervisor/hv_kvm/__init__.py
@@ -515,6 +515,7 @@
constants.HV_REBOOT_BEHAVIOR:
hv_base.ParamInSet(True, constants.REBOOT_BEHAVIORS),
constants.HV_CPU_MASK: hv_base.OPT_MULTI_CPU_MASK_CHECK,
+ constants.HV_WORKER_CPU_MASK: hv_base.OPT_MULTI_CPU_MASK_CHECK,
constants.HV_CPU_TYPE: hv_base.NO_CHECK,
constants.HV_CPU_CORES: hv_base.OPT_NONNEGATIVE_INT_CHECK,
constants.HV_CPU_THREADS: hv_base.OPT_NONNEGATIVE_INT_CHECK,
@@ -906,13 +907,19 @@
target_process.set_cpu_affinity(range(psutil.cpu_count()))
else:
target_process.set_cpu_affinity(cpus)
+ for p in target_process.get_children(recursive=True):
+ p.set_cpu_affinity(cpus)
@classmethod
- def _AssignCpuAffinity(cls, cpu_mask, process_id, thread_dict):
+ def _AssignCpuAffinity(cls, cpu_mask, worker_cpu_mask, process_id,
+ thread_dict):
"""Change CPU affinity for running VM according to given CPU mask.
@param cpu_mask: CPU mask as given by the user. e.g. "0-2,4:all:1,3"
@type cpu_mask: string
+ @param worker_cpu_mask: CPU mask as given by the user for the worker
+ threads. e.g. "0-2,4"
+ @type worker_cpu_mask: string
@param process_id: process ID of KVM process. Used to pin entire VM
to physical CPUs.
@type process_id: int
@@ -920,18 +927,18 @@
@type thread_dict: dict int:int
"""
- # Convert the string CPU mask to a list of list of int's
- cpu_list = utils.ParseMultiCpuMask(cpu_mask)
+ worker_cpu_list = utils.ParseCpuMask(worker_cpu_mask)
+ cls._SetProcessAffinity(process_id, worker_cpu_list)
+ # Convert the string CPU mask to a list of list of ints
+ cpu_list = utils.ParseMultiCpuMask(cpu_mask)
if len(cpu_list) == 1:
all_cpu_mapping = cpu_list[0]
- if all_cpu_mapping == constants.CPU_PINNING_OFF:
- # If CPU pinning has 1 entry that's "all", then do nothing
- pass
- else:
- # If CPU pinning has one non-all entry, map the entire VM to
- # one set of physical CPUs
- cls._SetProcessAffinity(process_id, all_cpu_mapping)
+ if all_cpu_mapping != constants.CPU_PINNING_OFF:
+ # The vcpus do not inherit the affinity of the parent process so they
+ # also must be pinned.
+ for vcpu in thread_dict:
+ cls._SetProcessAffinity(thread_dict[vcpu], all_cpu_mapping)
else:
# The number of vCPUs mapped should match the number of vCPUs
# reported by KVM. This was already verified earlier, so
@@ -962,7 +969,7 @@
return result
- def _ExecuteCpuAffinity(self, instance_name, cpu_mask):
+ def _ExecuteCpuAffinity(self, instance_name, cpu_mask, worker_cpu_mask):
"""Complete CPU pinning.
@type instance_name: string
@@ -976,7 +983,7 @@
# Get vCPU thread IDs, to be used if need to pin vCPUs separately
thread_dict = self._GetVcpuThreadIds(instance_name)
# Run CPU pinning, based on configured mask
- self._AssignCpuAffinity(cpu_mask, pid, thread_dict)
+ self._AssignCpuAffinity(cpu_mask, worker_cpu_mask, pid, thread_dict)
def ListInstances(self, hvparams=None):
"""Get the list of running instances.
@@ -1935,7 +1942,8 @@
# If requested, set CPU affinity and resume instance execution
if cpu_pinning:
- self._ExecuteCpuAffinity(instance.name, up_hvp[constants.HV_CPU_MASK])
+ self._ExecuteCpuAffinity(instance.name, up_hvp[constants.HV_CPU_MASK],
+ up_hvp[constants.HV_WORKER_CPU_MASK])
start_memory = self._InstanceStartupMemory(instance)
if start_memory < instance.beparams[constants.BE_MAXMEM]:
diff --git a/lib/jqueue/__init__.py b/lib/jqueue/__init__.py
index 9384f55..269c3c4 100644
--- a/lib/jqueue/__init__.py
+++ b/lib/jqueue/__init__.py
@@ -249,6 +249,8 @@
self.livelock = None
self.process_id = None
+ self.writable = None
+
self._InitInMemory(self, writable)
assert not self.archived, "New jobs can not be marked as archived"
@@ -1452,7 +1454,7 @@
return job
try:
- job = self._LoadJobFromDisk(job_id, False)
+ job = JobQueue._LoadJobFromDisk(self, job_id, False)
if job is None:
return job
except errors.JobFileCorrupted:
@@ -1473,7 +1475,8 @@
logging.debug("Added job %s to the cache", job_id)
return job
- def _LoadJobFromDisk(self, job_id, try_archived, writable=None):
+ @staticmethod
+ def _LoadJobFromDisk(queue, job_id, try_archived, writable=None):
"""Load the given job file from disk.
Given a job file, read, load and restore it in a _QueuedJob format.
@@ -1486,10 +1489,10 @@
@return: either None or the job object
"""
- path_functions = [(self._GetJobPath, False)]
+ path_functions = [(JobQueue._GetJobPath, False)]
if try_archived:
- path_functions.append((self._GetArchivedJobPath, True))
+ path_functions.append((JobQueue._GetArchivedJobPath, True))
raw_data = None
archived = None
@@ -1514,13 +1517,14 @@
try:
data = serializer.LoadJson(raw_data)
- job = _QueuedJob.Restore(self, data, writable, archived)
+ job = _QueuedJob.Restore(queue, data, writable, archived)
except Exception, err: # pylint: disable=W0703
raise errors.JobFileCorrupted(err)
return job
- def SafeLoadJobFromDisk(self, job_id, try_archived, writable=None):
+ @staticmethod
+ def SafeLoadJobFromDisk(queue, job_id, try_archived, writable=None):
"""Load the given job file from disk.
Given a job file, read, load and restore it in a _QueuedJob format.
@@ -1536,7 +1540,8 @@
"""
try:
- return self._LoadJobFromDisk(job_id, try_archived, writable=writable)
+ return JobQueue._LoadJobFromDisk(queue, job_id, try_archived,
+ writable=writable)
except (errors.JobFileCorrupted, EnvironmentError):
logging.exception("Can't load/parse job %s", job_id)
return None
@@ -1590,7 +1595,7 @@
# Not using in-memory cache as doing so would require an exclusive lock
# Try to load from disk
- job = self.SafeLoadJobFromDisk(job_id, True, writable=False)
+ job = JobQueue.SafeLoadJobFromDisk(self, job_id, True, writable=False)
if job:
assert not job.writable, "Got writable job" # pylint: disable=E1101
@@ -1635,7 +1640,7 @@
None if the job doesn't exist
"""
- job = self.SafeLoadJobFromDisk(job_id, True, writable=False)
+ job = JobQueue.SafeLoadJobFromDisk(self, job_id, True, writable=False)
if job is not None:
return job.CalcStatus() in constants.JOBS_FINALIZED
elif cluster.LUClusterDestroy.clusterHasBeenDestroyed:
diff --git a/lib/jqueue/exec.py b/lib/jqueue/exec.py
index 8e61805..896c002 100644
--- a/lib/jqueue/exec.py
+++ b/lib/jqueue/exec.py
@@ -49,7 +49,7 @@
from ganeti import pathutils
from ganeti.utils import livelock
-from ganeti.jqueue import _JobProcessor
+from ganeti.jqueue import _JobProcessor, JobQueue
def _GetMasterInfo():
@@ -132,7 +132,7 @@
prio_change[0] = True
signal.signal(signal.SIGUSR1, _User1Handler)
- job = context.jobqueue.SafeLoadJobFromDisk(job_id, False)
+ job = JobQueue.SafeLoadJobFromDisk(context.jobqueue, job_id, False)
job.SetPid(os.getpid())
@@ -154,7 +154,7 @@
if cancel[0]:
logging.debug("Got cancel request, cancelling job %d", job_id)
r = context.jobqueue.CancelJob(job_id)
- job = context.jobqueue.SafeLoadJobFromDisk(job_id, False)
+ job = JobQueue.SafeLoadJobFromDisk(context.jobqueue, job_id, False)
proc = _JobProcessor(context.jobqueue, execfun, job)
logging.debug("CancelJob result for job %d: %s", job_id, r)
cancel[0] = False
@@ -166,7 +166,7 @@
utils.RemoveFile(fname)
logging.debug("Changing priority of job %d to %d", job_id, new_prio)
r = context.jobqueue.ChangeJobPriority(job_id, new_prio)
- job = context.jobqueue.SafeLoadJobFromDisk(job_id, False)
+ job = JobQueue.SafeLoadJobFromDisk(context.jobqueue, job_id, False)
proc = _JobProcessor(context.jobqueue, execfun, job)
logging.debug("Result of changing priority of %d to %d: %s", job_id,
new_prio, r)
diff --git a/lib/jqueue/post_hooks_exec.py b/lib/jqueue/post_hooks_exec.py
new file mode 100644
index 0000000..a43b489
--- /dev/null
+++ b/lib/jqueue/post_hooks_exec.py
@@ -0,0 +1,121 @@
+#
+#
+
+# Copyright (C) 2015 Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+"""Module implementing the execution of opcode post hooks in a separate process
+
+This process receives the job_id from the master process and runs global post
+hooks for the last opcode whose execution started before the job process
+disappeared.
+
+"""
+
+import contextlib
+import logging
+import os
+import sys
+
+from ganeti import constants
+from ganeti import hooksmaster
+from ganeti import pathutils
+from ganeti import utils
+from ganeti.jqueue import JobQueue
+from ganeti.rpc import transport
+from ganeti.server import masterd
+from ganeti.utils import livelock
+
+
+def _GetMasterInfo():
+ """Retrieve the job id from the master process
+
+ This also closes standard input/output
+
+ @rtype: int
+
+ """
+ logging.debug("Reading job id from the master process")
+ logging.debug("Opening transport over stdin/out")
+ with contextlib.closing(transport.FdTransport((0, 1))) as trans:
+ job_id = int(trans.Call(""))
+ logging.debug("Got job id %d", job_id)
+ return job_id
+
+
+def main():
+
+ debug = int(os.environ["GNT_DEBUG"])
+
+ logname = pathutils.GetLogFilename("jobs")
+ utils.SetupLogging(logname, "job-post-hooks-startup", debug=debug)
+ job_id = _GetMasterInfo()
+ utils.SetupLogging(logname, "job-%s-post-hooks" % (job_id,), debug=debug)
+
+ try:
+ job = JobQueue.SafeLoadJobFromDisk(None, job_id, try_archived=False,
+ writable=False)
+ assert job.id == job_id, "The job id received %d differs " % job_id + \
+ "from the serialized one %d" % job.id
+
+ target_op = None
+ for op in job.ops:
+ if op.start_timestamp is None:
+ break
+ target_op = op
+
+ # We should run post hooks only if opcode execution has been started.
+ # Note that currently the opcodes inside a job execute sequentially.
+ if target_op is None:
+ sys.exit(0)
+
+ livelock_name = livelock.LiveLockName("post-hooks-executor-%d" % job_id)
+ context = masterd.GanetiContext(livelock_name)
+ cfg_tmp = context.GetConfig(job_id)
+ # Get static snapshot of the config and release it in order to prevent
+ # further synchronizations.
+ cfg = cfg_tmp.GetDetachedConfig()
+ cfg_tmp.OutDate()
+
+ hooksmaster.ExecGlobalPostHooks(target_op.input.OP_ID,
+ cfg.GetMasterNodeName(),
+ context.GetRpc(cfg).call_hooks_runner,
+ logging.warning, cfg.GetClusterName(),
+ cfg.GetMasterNode(), job_id,
+ constants.POST_HOOKS_STATUS_DISAPPEARED)
+ except Exception: # pylint: disable=W0703
+ logging.exception("Exception when trying to run post hooks of job %d",
+ job_id)
+ finally:
+ logging.debug("Post hooks exec for disappeared job %d finalized", job_id)
+ logging.debug("Removing livelock file %s", livelock_name.GetPath())
+ os.remove(livelock_name.GetPath())
+
+ sys.exit(0)
+
+if __name__ == '__main__':
+ main()
diff --git a/lib/mcpu.py b/lib/mcpu.py
index 41021ef..85cee15 100644
--- a/lib/mcpu.py
+++ b/lib/mcpu.py
@@ -304,6 +304,7 @@
self.cfg = context.GetConfig(ec_id)
self.rpc = context.GetRpc(self.cfg)
self.hmclass = hooksmaster.HooksMaster
+ self._hm = None
self._enable_locks = enable_locks
self.wconfd = wconfd # Indirection to allow testing
self._wconfdcontext = context.GetWConfdContext(ec_id)
@@ -482,9 +483,11 @@
lu.cfg.OutDate()
lu.CheckPrereq()
- hm = self.BuildHooksManager(lu)
+ self._hm = self.BuildHooksManager(lu)
try:
- h_results = hm.RunPhase(constants.HOOKS_PHASE_PRE)
+ # Run hooks twice: first for the global hooks, then for the usual hooks.
+ self._hm.RunPhase(constants.HOOKS_PHASE_PRE, is_global=True)
+ h_results = self._hm.RunPhase(constants.HOOKS_PHASE_PRE)
except Exception, err: # pylint: disable=W0703
# This gives the LU a chance of cleaning up in case of an hooks failure.
# The type of exception is deliberately broad to be able to react to
@@ -512,19 +515,20 @@
lusExecuting[0] += 1
try:
result = _ProcessResult(submit_mj_fn, lu.op, lu.Exec(self.Log))
- h_results = hm.RunPhase(constants.HOOKS_PHASE_POST)
+ h_results = self._hm.RunPhase(constants.HOOKS_PHASE_POST)
result = lu.HooksCallBack(constants.HOOKS_PHASE_POST, h_results,
self.Log, result)
finally:
# FIXME: This needs locks if not lu_class.REQ_BGL
lusExecuting[0] -= 1
if write_count != self.cfg.write_count:
- hm.RunConfigUpdate()
+ self._hm.RunConfigUpdate()
return result
def BuildHooksManager(self, lu):
- return self.hmclass.BuildFromLu(lu.rpc.call_hooks_runner, lu)
+ return self.hmclass.BuildFromLu(lu.rpc.call_hooks_runner, lu,
+ self.GetECId())
def _LockAndExecLU(self, lu, level, calc_timeout, pending=None):
"""Execute a Logical Unit, with the needed locks.
@@ -665,33 +669,17 @@
raise errors.OpResultError("Opcode result does not match %s: %s" %
(resultcheck_fn, utils.Truncate(result, 80)))
- def ExecOpCode(self, op, cbs, timeout=None):
+ def _PrepareLockListsAndExecLU(self, op, lu_class, calc_timeout):
"""Execute an opcode.
- @type op: an OpCode instance
@param op: the opcode to be executed
- @type cbs: L{OpExecCbBase}
- @param cbs: Runtime callbacks
- @type timeout: float or None
- @param timeout: Maximum time to acquire all locks, None for no timeout
+ @param lu_class: the LU class implementing the current opcode
+ @param calc_timeout: The function calculating the time remaining
+ to acquire all locks, None for no timeout
@raise LockAcquireTimeout: In case locks couldn't be acquired in specified
amount of time
"""
- if not isinstance(op, opcodes.OpCode):
- raise errors.ProgrammerError("Non-opcode instance passed"
- " to ExecOpcode (%s)" % type(op))
-
- lu_class = self.DISPATCH_TABLE.get(op.__class__, None)
- if lu_class is None:
- raise errors.OpCodeUnknown("Unknown opcode")
-
- if timeout is None:
- calc_timeout = lambda: None
- else:
- calc_timeout = utils.RunningTimeout(timeout, False).Remaining
-
- self._cbs = cbs
try:
if self._enable_locks:
# Acquire the Big Ganeti Lock exclusively if this LU requires it,
@@ -721,8 +709,55 @@
self._wconfdcontext, locking.LEVEL_NAMES[locking.LEVEL_CLUSTER])
self._cbs = None
- self._CheckLUResult(op, result)
+ return result
+ def ExecOpCode(self, op, cbs, timeout=None):
+ """Execute an opcode.
+
+ @type op: an OpCode instance
+ @param op: the opcode to be executed
+ @type cbs: L{OpExecCbBase}
+ @param cbs: Runtime callbacks
+ @type timeout: float or None
+ @param timeout: Maximum time to acquire all locks, None for no timeout
+ @raise LockAcquireTimeout: In case locks couldn't be acquired in specified
+ amount of time
+
+ """
+ if not isinstance(op, opcodes.OpCode):
+ raise errors.ProgrammerError("Non-opcode instance passed"
+ " to ExecOpcode (%s)" % type(op))
+
+ lu_class = self.DISPATCH_TABLE.get(op.__class__, None)
+ if lu_class is None:
+ raise errors.OpCodeUnknown("Unknown opcode")
+
+ if timeout is None:
+ calc_timeout = lambda: None
+ else:
+ calc_timeout = utils.RunningTimeout(timeout, False).Remaining
+
+ self._cbs = cbs
+ try:
+ result = self._PrepareLockListsAndExecLU(op, lu_class, calc_timeout)
+
+ # The post hooks below are always executed with a SUCCESS status because
+ # all the possible errors during pre hooks and LU execution cause
+ # exception and therefore the statement below will be skipped.
+ if self._hm is not None:
+ self._hm.RunPhase(constants.HOOKS_PHASE_POST, is_global=True,
+ post_status=constants.POST_HOOKS_STATUS_SUCCESS)
+ except:
+ # execute global post hooks with the failed status on any exception
+ hooksmaster.ExecGlobalPostHooks(op.OP_ID, self.cfg.GetMasterNodeName(),
+ self.rpc.call_hooks_runner,
+ logging.warning,
+ self.cfg.GetClusterName(),
+ self.cfg.GetMasterNode(), self.GetECId(),
+ constants.POST_HOOKS_STATUS_ERROR)
+ raise
+
+ self._CheckLUResult(op, result)
return result
def Log(self, *args):
diff --git a/lib/query.py b/lib/query.py
index 86c72b6..8db59f9 100644
--- a/lib/query.py
+++ b/lib/query.py
@@ -2008,6 +2008,12 @@
IQ_CONFIG, 0, lambda ctx, inst: [disk.name for disk in inst.disks]),
(_MakeField("disk.uuids", "Disk_UUIDs", QFT_OTHER, "List of disk UUIDs"),
IQ_CONFIG, 0, lambda ctx, inst: [disk.uuid for disk in inst.disks]),
+ (_MakeField("disk.storage_ids", "Disk_storage_ids", QFT_OTHER,
+ "List of disk storage ids"),
+ IQ_CONFIG, 0, lambda ctx, inst: [disk.storage_id for disk in inst.disks]),
+ (_MakeField("disk.providers", "Disk_providers", QFT_OTHER,
+ "List of disk ExtStorage providers"),
+ IQ_CONFIG, 0, lambda ctx, inst: [disk.provider for disk in inst.disks]),
]
# Disks by number
diff --git a/lib/rapi/auth/__init__.py b/lib/rapi/auth/__init__.py
new file mode 100644
index 0000000..9024fbe
--- /dev/null
+++ b/lib/rapi/auth/__init__.py
@@ -0,0 +1,55 @@
+#
+#
+
+# Copyright (C) 2015 Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Module containing different authentificators that can be used by RAPI
+
+"""
+
+
+class RapiAuthenticator(object):
+ """Class providing authentication interface for RAPi requests.
+
+ """
+ def ValidateRequest(self, req, handler_access, realm):
+ """Checks whether it's permitted to execute an rapi request.
+
+ Must be implemented in derived classes.
+
+ @type req: L{http.server._HttpServerRequest}
+ @param req: HTTP request context
+ @type handler_access: set of strings
+ @param handler_access: access rights required by the requested resourse
+ @type realm: string
+ @param realm: Authentication realm
+ @rtype: str or None
+ @return: the authenticated user name if request execution is permitted and
+ None otherwise
+
+ """
+ raise NotImplementedError()
diff --git a/lib/rapi/auth/basic_auth.py b/lib/rapi/auth/basic_auth.py
new file mode 100644
index 0000000..6fcfbef
--- /dev/null
+++ b/lib/rapi/auth/basic_auth.py
@@ -0,0 +1,150 @@
+#
+#
+
+# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2012, 2013, 2015 Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+"""Module interacting with RAPI users config file
+
+"""
+
+import logging
+import os
+try:
+ from pyinotify import pyinotify # pylint: disable=E0611
+except ImportError:
+ import pyinotify
+
+from ganeti import asyncnotifier
+from ganeti import compat
+from ganeti import http
+from ganeti.http.auth import HttpServerRequestAuthentication
+from ganeti import pathutils
+from ganeti.rapi import auth
+from ganeti.rapi.auth import users_file
+
+
+class FileEventHandler(asyncnotifier.FileEventHandlerBase):
+ def __init__(self, wm, path, cb):
+ """Initializes this class.
+
+ @param wm: Inotify watch manager
+ @type path: string
+ @param path: File path
+ @type cb: callable
+ @param cb: Function called on file change
+
+ """
+ asyncnotifier.FileEventHandlerBase.__init__(self, wm)
+
+ self._cb = cb
+ self._filename = os.path.basename(path)
+
+ # Different Pyinotify versions have the flag constants at different places,
+ # hence not accessing them directly
+ mask = (pyinotify.EventsCodes.ALL_FLAGS["IN_CLOSE_WRITE"] |
+ pyinotify.EventsCodes.ALL_FLAGS["IN_DELETE"] |
+ pyinotify.EventsCodes.ALL_FLAGS["IN_MOVED_FROM"] |
+ pyinotify.EventsCodes.ALL_FLAGS["IN_MOVED_TO"])
+
+ self._handle = self.AddWatch(os.path.dirname(path), mask)
+
+ def process_default(self, event):
+ """Called upon inotify event.
+
+ """
+ if event.name == self._filename:
+ logging.debug("Received inotify event %s", event)
+ self._cb()
+
+
+def SetupFileWatcher(filename, cb):
+ """Configures an inotify watcher for a file.
+
+ @type filename: string
+ @param filename: File to watch
+ @type cb: callable
+ @param cb: Function called on file change
+
+ """
+ wm = pyinotify.WatchManager()
+ handler = FileEventHandler(wm, filename, cb)
+ asyncnotifier.AsyncNotifier(wm, default_proc_fun=handler)
+
+
+class BasicAuthenticator(auth.RapiAuthenticator):
+ """Class providing an Authenticate method based on basic http authentication.
+
+ """
+
+ def __init__(self, user_fn=None):
+ """Loads users file and initializes a watcher for it.
+
+ @param user_fn: A function that should be called to obtain a user info
+ instead of the default users_file interface.
+
+ """
+ if user_fn:
+ self.user_fn = user_fn
+ return
+
+ self.users = users_file.RapiUsers()
+ self.user_fn = self.users.Get
+ # Setup file watcher (it'll be driven by asyncore)
+ SetupFileWatcher(pathutils.RAPI_USERS_FILE,
+ compat.partial(self.users.Load,
+ pathutils.RAPI_USERS_FILE))
+
+ self.users.Load(pathutils.RAPI_USERS_FILE)
+
+ def ValidateRequest(self, req, handler_access, realm):
+ """Checks whether a user can access a resource.
+
+ """
+ request_username, request_password = HttpServerRequestAuthentication \
+ .ExtractUserPassword(req)
+ if request_username is None:
+ raise http.HttpUnauthorized()
+ if request_password is None:
+ raise http.HttpBadRequest(message=("Basic authentication requires"
+ " password"))
+
+ user = self.user_fn(request_username)
+ if not (user and HttpServerRequestAuthentication
+ .VerifyBasicAuthPassword(request_username,
+ request_password,
+ user.password, realm)):
+ # Unknown user or password wrong
+ return None
+
+ if (not handler_access or
+ set(user.options).intersection(handler_access)):
+ # Allow access
+ return request_username
+
+ # Access forbidden
+ raise http.HttpForbidden()
diff --git a/lib/rapi/auth/pam.py b/lib/rapi/auth/pam.py
new file mode 100644
index 0000000..8b620f7
--- /dev/null
+++ b/lib/rapi/auth/pam.py
@@ -0,0 +1,379 @@
+#
+#
+
+# Copyright (C) 2015, 2016 Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+"""Module interacting with PAM performing authorization and authentication
+
+This module authenticates and authorizes RAPI users based on their credintials.
+Both actions are performed by interaction with PAM as a 'ganeti-rapi' service.
+
+"""
+
+import logging
+try:
+ import ctypes as c # pylint: disable=F0401
+ import ctypes.util as util
+except ImportError:
+ c = None
+
+from ganeti import constants
+from ganeti.errors import PamRapiAuthError
+import ganeti.http as http
+from ganeti.http.auth import HttpServerRequestAuthentication
+from ganeti.rapi import auth
+
+
+__all__ = ['PamAuthenticator']
+
+DEFAULT_SERVICE_NAME = 'ganeti-rapi'
+MAX_STR_LENGTH = 100000
+MAX_MSG_COUNT = 100
+PAM_ENV_URI = 'GANETI_RAPI_URI'
+PAM_ENV_BODY = 'GANETI_REQUEST_BODY'
+PAM_ENV_METHOD = 'GANETI_REQUEST_METHOD'
+PAM_ENV_ACCESS = 'GANETI_RESOURCE_ACCESS'
+
+PAM_ABORT = 26
+PAM_BUF_ERR = 5
+PAM_CONV_ERR = 19
+PAM_SILENT = 32768
+PAM_SUCCESS = 0
+
+PAM_PROMPT_ECHO_OFF = 1
+
+PAM_AUTHTOK = 6
+PAM_USER = 2
+
+if c:
+ class PamHandleT(c.Structure):
+ """Wrapper for PamHandleT
+
+ """
+ _fields_ = [("hidden", c.c_void_p)]
+
+ def __init__(self):
+ c.Structure.__init__(self)
+ self.handle = 0
+
+ class PamMessage(c.Structure):
+ """Wrapper for PamMessage
+
+ """
+ _fields_ = [
+ ("msg_style", c.c_int),
+ ("msg", c.c_char_p),
+ ]
+
+ class PamResponse(c.Structure):
+ """Wrapper for PamResponse
+
+ """
+ _fields_ = [
+ ("resp", c.c_char_p),
+ ("resp_retcode", c.c_int),
+ ]
+
+ CONV_FUNC = c.CFUNCTYPE(c.c_int, c.c_int, c.POINTER(c.POINTER(PamMessage)),
+ c.POINTER(c.POINTER(PamResponse)), c.c_void_p)
+
+ class PamConv(c.Structure):
+ """Wrapper for PamConv
+
+ """
+ _fields_ = [
+ ("conv", CONV_FUNC),
+ ("appdata_ptr", c.c_void_p),
+ ]
+
+
+class CFunctions(object):
+ def __init__(self):
+ if not c:
+ raise PamRapiAuthError("ctypes Python package is not found;"
+ " remote API PAM authentication is not available")
+ self.libpam = c.CDLL(util.find_library("pam"))
+ if not self.libpam:
+ raise PamRapiAuthError("libpam C library is not found;"
+ " remote API PAM authentication is not available")
+ self.libc = c.CDLL(util.find_library("c"))
+ if not self.libc:
+ raise PamRapiAuthError("libc C library is not found;"
+ " remote API PAM authentication is not available")
+
+ self.pam_acct_mgmt = self.libpam.pam_acct_mgmt
+ self.pam_acct_mgmt.argtypes = [PamHandleT, c.c_int]
+ self.pam_acct_mgmt.restype = c.c_int
+
+ self.pam_authenticate = self.libpam.pam_authenticate
+ self.pam_authenticate.argtypes = [PamHandleT, c.c_int]
+ self.pam_authenticate.restype = c.c_int
+
+ self.pam_end = self.libpam.pam_end
+ self.pam_end.argtypes = [PamHandleT, c.c_int]
+ self.pam_end.restype = c.c_int
+
+ self.pam_get_item = self.libpam.pam_get_item
+ self.pam_get_item.argtypes = [PamHandleT, c.c_int, c.POINTER(c.c_void_p)]
+ self.pam_get_item.restype = c.c_int
+
+ self.pam_putenv = self.libpam.pam_putenv
+ self.pam_putenv.argtypes = [PamHandleT, c.c_char_p]
+ self.pam_putenv.restype = c.c_int
+
+ self.pam_set_item = self.libpam.pam_set_item
+ self.pam_set_item.argtypes = [PamHandleT, c.c_int, c.c_void_p]
+ self.pam_set_item.restype = c.c_int
+
+ self.pam_start = self.libpam.pam_start
+ self.pam_start.argtypes = [
+ c.c_char_p,
+ c.c_char_p,
+ c.POINTER(PamConv),
+ c.POINTER(PamHandleT),
+ ]
+ self.pam_start.restype = c.c_int
+
+ self.calloc = self.libc.calloc
+ self.calloc.argtypes = [c.c_uint, c.c_uint]
+ self.calloc.restype = c.c_void_p
+
+ self.free = self.libc.free
+ self.free.argstypes = [c.c_void_p]
+ self.free.restype = None
+
+ self.strndup = self.libc.strndup
+ self.strndup.argstypes = [c.c_char_p, c.c_uint]
+ self.strndup.restype = c.c_char_p
+
+
+def Authenticate(cf, pam_handle, authtok=None):
+ """Performs authentication via PAM.
+
+ Perfroms two steps:
+ - if authtok is provided then set it with pam_set_item
+ - call pam_authenticate
+
+ """
+ try:
+ authtok_copy = None
+ if authtok:
+ authtok_copy = cf.strndup(authtok, len(authtok))
+ if not authtok_copy:
+ raise http.HttpInternalServerError("Not enough memory for PAM")
+ ret = cf.pam_set_item(c.pointer(pam_handle), PAM_AUTHTOK, authtok_copy)
+ if ret != PAM_SUCCESS:
+ raise http.HttpInternalServerError("pam_set_item failed [%d]" % ret)
+
+ ret = cf.pam_authenticate(pam_handle, 0)
+ if ret == PAM_ABORT:
+ raise http.HttpInternalServerError("pam_authenticate requested abort")
+ if ret != PAM_SUCCESS:
+ raise http.HttpUnauthorized("Authentication failed")
+ except:
+ cf.pam_end(pam_handle, ret)
+ raise
+ finally:
+ if authtok_copy:
+ cf.free(authtok_copy)
+
+
+def PutPamEnvVariable(cf, pam_handle, name, value):
+ """Wrapper over pam_setenv.
+
+ """
+ setenv = "%s=" % name
+ if value:
+ setenv += value
+ ret = cf.pam_putenv(pam_handle, setenv)
+ if ret != PAM_SUCCESS:
+ raise http.HttpInternalServerError("pam_putenv call failed [%d]" % ret)
+
+
+def Authorize(cf, pam_handle, uri_access_rights, uri=None, method=None,
+ body=None):
+ """Performs authorization via PAM.
+
+ Performs two steps:
+ - initialize environmental variables
+ - call pam_acct_mgmt
+
+ """
+ try:
+ PutPamEnvVariable(cf, pam_handle, PAM_ENV_ACCESS, uri_access_rights)
+ PutPamEnvVariable(cf, pam_handle, PAM_ENV_URI, uri)
+ PutPamEnvVariable(cf, pam_handle, PAM_ENV_METHOD, method)
+ PutPamEnvVariable(cf, pam_handle, PAM_ENV_BODY, body)
+
+ ret = cf.pam_acct_mgmt(pam_handle, PAM_SILENT)
+ if ret != PAM_SUCCESS:
+ raise http.HttpUnauthorized("Authorization failed")
+ except:
+ cf.pam_end(pam_handle, ret)
+ raise
+
+
+def ValidateParams(username, _uri_access_rights, password, service, authtok,
+ _uri, _method, _body):
+ """Checks whether ValidateRequest has been called with a correct params.
+
+ These checks includes:
+ - username is an obligatory parameter
+ - either password or authtok is an obligatory parameter
+
+ """
+ if not username:
+ raise http.HttpUnauthorized("Username should be provided")
+ if not service:
+ raise http.HttpBadRequest("Service should be proivded")
+ if not password and not authtok:
+ raise http.HttpUnauthorized("Password or authtok should be provided")
+
+
+def ValidateRequest(cf, username, uri_access_rights, password=None,
+ service=DEFAULT_SERVICE_NAME, authtok=None, uri=None,
+ method=None, body=None):
+ """Checks whether it's permitted to execute an rapi request.
+
+ Calls pam_authenticate and then pam_acct_mgmt in order to check whether a
+ request should be executed.
+
+ @param cf: An instance of CFunctions class containing necessary imports
+ @param username: username
+ @param uri_access_rights: handler access rights
+ @param password: password
+ @param service: a service name that will be used for the interaction with PAM
+ @param authtok: user's authentication token (e.g. some kind of signature)
+ @param uri: an uri of a target resource obtained from an http header
+ @param method: http method trying to access the uri
+ @param body: a body of an RAPI request
+ @return: On success - authenticated user name. Throws an exception otherwise.
+
+ """
+ ValidateParams(username, uri_access_rights, password, service, authtok, uri,
+ method, body)
+
+ def ConversationFunction(num_msg, msg, resp, _app_data_ptr):
+ """Conversation function that will be provided to PAM modules.
+
+ The function replies with a password for each message with
+ PAM_PROMPT_ECHO_OFF style and just ignores the others.
+
+ """
+ if num_msg > MAX_MSG_COUNT:
+ logging.warning("Too many messages passed to conv function: [%d]",
+ num_msg)
+ return PAM_BUF_ERR
+ response = cf.calloc(num_msg, c.sizeof(PamResponse))
+ if not response:
+ logging.warning("calloc failed in conv function")
+ return PAM_BUF_ERR
+ resp[0] = c.cast(response, c.POINTER(PamResponse))
+ for i in range(num_msg):
+ if msg[i].contents.msg_style != PAM_PROMPT_ECHO_OFF:
+ continue
+ resp.contents[i].resp = cf.strndup(password, len(password))
+ if not resp.contents[i].resp:
+ logging.warning("strndup failed in conv function")
+ for j in range(i):
+ cf.free(c.cast(resp.contents[j].resp, c.c_void_p))
+ cf.free(response)
+ return PAM_BUF_ERR
+ resp.contents[i].resp_retcode = 0
+ return PAM_SUCCESS
+
+ pam_handle = PamHandleT()
+ conv = PamConv(CONV_FUNC(ConversationFunction), 0)
+ ret = cf.pam_start(service, username, c.pointer(conv), c.pointer(pam_handle))
+ if ret != PAM_SUCCESS:
+ cf.pam_end(pam_handle, ret)
+ raise http.HttpInternalServerError("pam_start call failed [%d]" % ret)
+
+ Authenticate(cf, pam_handle, authtok)
+ Authorize(cf, pam_handle, uri_access_rights, uri, method, body)
+
+ # retrieve the authorized user name
+ puser = c.c_void_p()
+ ret = cf.pam_get_item(pam_handle, PAM_USER, c.pointer(puser))
+ if ret != PAM_SUCCESS or not puser:
+ cf.pam_end(pam_handle, ret)
+ raise http.HttpInternalServerError("pam_get_item call failed [%d]" % ret)
+ user_c_string = c.cast(puser, c.c_char_p)
+
+ cf.pam_end(pam_handle, PAM_SUCCESS)
+ return user_c_string.value
+
+
+def MakeStringC(string):
+ """Converts a string to a valid C string.
+
+ As a C side treats non-unicode strings, encode unicode string with 'ascii'.
+ Also ensure that C string will not be longer than MAX_STR_LENGTH in order to
+ prevent attacs based on too long buffers.
+
+ """
+ if string is None:
+ return None
+ if isinstance(string, unicode):
+ string = string.encode("ascii")
+ if not isinstance(string, str):
+ return None
+ if len(string) <= MAX_STR_LENGTH:
+ return string
+ return string[:MAX_STR_LENGTH]
+
+
+class PamAuthenticator(auth.RapiAuthenticator):
+ """Class providing an Authenticate method based on interaction with PAM.
+
+ """
+
+ def __init__(self):
+ """Checks whether ctypes has been imported.
+
+ """
+ self.cf = CFunctions()
+
+ def ValidateRequest(self, req, handler_access, _):
+ """Checks whether a user can access a resource.
+
+ This function retuns authenticated user name on success.
+
+ """
+ username, password = HttpServerRequestAuthentication \
+ .ExtractUserPassword(req)
+ authtok = req.request_headers.get(constants.HTTP_RAPI_PAM_CREDENTIAL, None)
+ if handler_access is not None:
+ handler_access_ = ','.join(handler_access)
+ return ValidateRequest(self.cf, MakeStringC(username),
+ MakeStringC(handler_access_),
+ MakeStringC(password),
+ MakeStringC(DEFAULT_SERVICE_NAME),
+ MakeStringC(authtok), MakeStringC(req.request_path),
+ MakeStringC(req.request_method),
+ MakeStringC(req.request_body))
diff --git a/lib/rapi/auth/users_file.py b/lib/rapi/auth/users_file.py
new file mode 100644
index 0000000..e2a26d3
--- /dev/null
+++ b/lib/rapi/auth/users_file.py
@@ -0,0 +1,137 @@
+#
+#
+
+# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2012, 2013, 2015 Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""RAPI users config file parser.
+
+"""
+
+import errno
+import logging
+
+from ganeti import utils
+
+
+class PasswordFileUser(object):
+ """Data structure for users from password file.
+
+ """
+ def __init__(self, name, password, options):
+ self.name = name
+ self.password = password
+ self.options = options
+
+
+def ParsePasswordFile(contents):
+ """Parses the contents of a password file.
+
+ Lines in the password file are of the following format::
+
+ <username> <password> [options]
+
+ Fields are separated by whitespace. Username and password are mandatory,
+ options are optional and separated by comma (','). Empty lines and comments
+ ('#') are ignored.
+
+ @type contents: str
+ @param contents: Contents of password file
+ @rtype: dict
+ @return: Dictionary containing L{PasswordFileUser} instances
+
+ """
+ users = {}
+
+ for line in utils.FilterEmptyLinesAndComments(contents):
+ parts = line.split(None, 2)
+ if len(parts) < 2:
+ # Invalid line
+ # TODO: Return line number from FilterEmptyLinesAndComments
+ logging.warning("Ignoring non-comment line with less than two fields")
+ continue
+
+ name = parts[0]
+ password = parts[1]
+
+ # Extract options
+ options = []
+ if len(parts) >= 3:
+ for part in parts[2].split(","):
+ options.append(part.strip())
+ else:
+ logging.warning("Ignoring values for user '%s': %s", name, parts[3:])
+
+ users[name] = PasswordFileUser(name, password, options)
+
+ return users
+
+
+class RapiUsers(object):
+ def __init__(self):
+ """Initializes this class.
+
+ """
+ self._users = None
+
+ def Get(self, username):
+ """Checks whether a user exists.
+
+ """
+ if self._users:
+ return self._users.get(username, None)
+ else:
+ return None
+
+ def Load(self, filename):
+ """Loads a file containing users and passwords.
+
+ @type filename: string
+ @param filename: Path to file
+
+ """
+ logging.info("Reading users file at %s", filename)
+ try:
+ try:
+ contents = utils.ReadFile(filename)
+ except EnvironmentError, err:
+ self._users = None
+ if err.errno == errno.ENOENT:
+ logging.warning("No users file at %s", filename)
+ else:
+ logging.warning("Error while reading %s: %s", filename, err)
+ return False
+
+ users = ParsePasswordFile(contents)
+
+ except Exception, err: # pylint: disable=W0703
+ # We don't care about the type of exception
+ logging.error("Error while parsing %s: %s", filename, err)
+ return False
+
+ self._users = users
+
+ return True
diff --git a/lib/rapi/baserlib.py b/lib/rapi/baserlib.py
index 6a4014a..25e4781 100644
--- a/lib/rapi/baserlib.py
+++ b/lib/rapi/baserlib.py
@@ -323,6 +323,8 @@
self._client_cls = _client_cls
+ self.auth_user = ""
+
def _GetRequestBody(self):
"""Returns the body data.
@@ -411,6 +413,11 @@
raise http.HttpInternalServerError("Internal error: no permission to"
" connect to the master daemon")
+ def GetAuthReason(self):
+ return (constants.OPCODE_REASON_SRC_RLIB2,
+ constants.OPCODE_REASON_AUTH_USER + self.auth_user,
+ utils.EpochNano())
+
def SubmitJob(self, op, cl=None):
"""Generic wrapper for submit job, for better http compatibility.
@@ -425,6 +432,11 @@
if cl is None:
cl = self.GetClient()
try:
+ for opcode in op:
+ # Add an authorized user name to the reason trail
+ trail = getattr(opcode, constants.OPCODE_REASON, [])
+ trail.append(self.GetAuthReason())
+ setattr(opcode, constants.OPCODE_REASON, trail)
return cl.SubmitJob(op)
except errors.JobQueueFull:
raise http.HttpServiceUnavailable("Job queue is full, needs archiving")
diff --git a/lib/rapi/rlib2.py b/lib/rapi/rlib2.py
index 8514fcb..7b14e81 100644
--- a/lib/rapi/rlib2.py
+++ b/lib/rapi/rlib2.py
@@ -85,7 +85,7 @@
"nic.links", "nic.networks", "nic.networks.names", "nic.bridges",
"network_port",
"disk.sizes", "disk.spindles", "disk_usage", "disk.uuids",
- "disk.names",
+ "disk.names", "disk.storage_ids", "disk.providers",
"beparams", "hvparams",
"oper_state", "oper_ram", "oper_vcpus", "status",
"custom_hvparams", "custom_beparams", "custom_nicparams",
@@ -399,6 +399,8 @@
priority, predicates, action, reason = \
checkFilterParameters(self.request_body)
+ reason.append(self.GetAuthReason())
+
# ReplaceFilter(None, ...) inserts a new filter.
return self.GetClient().ReplaceFilter(None, priority, predicates, action,
reason)
@@ -444,6 +446,8 @@
priority, predicates, action, reason = \
checkFilterParameters(self.request_body)
+ reason.append(self.GetAuthReason())
+
return self.GetClient().ReplaceFilter(uuid, priority, predicates, action,
reason)
diff --git a/lib/rapi/testutils.py b/lib/rapi/testutils.py
index 4d054cd..a0986a8 100644
--- a/lib/rapi/testutils.py
+++ b/lib/rapi/testutils.py
@@ -50,9 +50,9 @@
import ganeti.http.server # pylint: disable=W0611
import ganeti.server.rapi
+from ganeti.rapi.auth import users_file
import ganeti.rapi.client
-
_URI_RE = re.compile(r"https://(?P<host>.*):(?P<port>\d+)(?P<path>/.*)")
@@ -359,18 +359,23 @@
username = utils.GenerateSecret()
password = utils.GenerateSecret()
- def user_fn(wanted):
- """Called to verify user credentials given in HTTP request.
+ # pylint: disable=W0232
+ class SimpleAuthenticator():
+ # pylint: disable=R0201
+ def ValidateRequest(self, req, _handler_access, _realm):
+ """Called to verify user credentials given in HTTP request.
- """
- assert username == wanted
- return http.auth.PasswordFileUser(username, password,
- [rapi.RAPI_ACCESS_WRITE])
+ """
+ wanted, _ = http.auth.HttpServerRequestAuthentication \
+ .ExtractUserPassword(req)
+ assert username == wanted
+ return users_file.PasswordFileUser(username, password,
+ [rapi.RAPI_ACCESS_WRITE]).name
self._lcr = _LuxiCallRecorder()
# Create a mock RAPI server
- handler = _RapiMock(user_fn, self._lcr)
+ handler = _RapiMock(SimpleAuthenticator(), self._lcr)
self._client = \
rapi.client.GanetiRapiClient("master.example.com",
diff --git a/lib/server/rapi.py b/lib/server/rapi.py
index 9782ada..cf18360 100644
--- a/lib/server/rapi.py
+++ b/lib/server/rapi.py
@@ -38,27 +38,18 @@
import logging
import optparse
import sys
-import os
-import os.path
-import errno
-try:
- from pyinotify import pyinotify # pylint: disable=E0611
-except ImportError:
- import pyinotify
-
-from ganeti import asyncnotifier
from ganeti import constants
from ganeti import http
from ganeti import daemon
from ganeti import ssconf
import ganeti.rpc.errors as rpcerr
from ganeti import serializer
-from ganeti import compat
-from ganeti import utils
from ganeti import pathutils
from ganeti.rapi import connector
from ganeti.rapi import baserlib
+from ganeti.rapi.auth import basic_auth
+from ganeti.rapi.auth import pam
import ganeti.http.auth # pylint: disable=W0611
import ganeti.http.server
@@ -82,12 +73,12 @@
"""
AUTH_REALM = "Ganeti Remote API"
- def __init__(self, user_fn, reqauth, _client_cls=None):
+ def __init__(self, authenticator, reqauth, _client_cls=None):
"""Initializes this class.
- @type user_fn: callable
- @param user_fn: Function receiving username as string and returning
- L{http.auth.PasswordFileUser} or C{None} if user is not found
+ @type authenticator: an implementation of {RapiAuthenticator} interface
+ @param authenticator: a class containing an implementation of
+ ValidateRequest function
@type reqauth: bool
@param reqauth: Whether to require authentication
@@ -98,7 +89,7 @@
http.auth.HttpServerRequestAuthentication.__init__(self)
self._client_cls = _client_cls
self._resmap = connector.Mapper()
- self._user_fn = user_fn
+ self._authenticator = authenticator
self._reqauth = reqauth
@staticmethod
@@ -155,28 +146,21 @@
"""Determine whether authentication is required.
"""
- return self._reqauth or bool(self._GetRequestContext(req).handler_access)
+ return self._reqauth
- def Authenticate(self, req, username, password):
+ def Authenticate(self, req):
"""Checks whether a user can access a resource.
+ @return: username of an authenticated user or None otherwise
"""
ctx = self._GetRequestContext(req)
-
- user = self._user_fn(username)
- if not (user and
- self.VerifyBasicAuthPassword(req, username, password,
- user.password)):
- # Unknown user or password wrong
+ auth_user = self._authenticator.ValidateRequest(
+ req, ctx.handler_access, self.GetAuthRealm(req))
+ if auth_user is None:
return False
- if (not ctx.handler_access or
- set(user.options).intersection(ctx.handler_access)):
- # Allow access
- return True
-
- # Access forbidden
- raise http.HttpForbidden()
+ ctx.handler.auth_user = auth_user
+ return True
def HandleRequest(self, req):
"""Handles a request.
@@ -214,101 +198,6 @@
return serializer.DumpJson(result)
-class RapiUsers(object):
- def __init__(self):
- """Initializes this class.
-
- """
- self._users = None
-
- def Get(self, username):
- """Checks whether a user exists.
-
- """
- if self._users:
- return self._users.get(username, None)
- else:
- return None
-
- def Load(self, filename):
- """Loads a file containing users and passwords.
-
- @type filename: string
- @param filename: Path to file
-
- """
- logging.info("Reading users file at %s", filename)
- try:
- try:
- contents = utils.ReadFile(filename)
- except EnvironmentError, err:
- self._users = None
- if err.errno == errno.ENOENT:
- logging.warning("No users file at %s", filename)
- else:
- logging.warning("Error while reading %s: %s", filename, err)
- return False
-
- users = http.auth.ParsePasswordFile(contents)
-
- except Exception, err: # pylint: disable=W0703
- # We don't care about the type of exception
- logging.error("Error while parsing %s: %s", filename, err)
- return False
-
- self._users = users
-
- return True
-
-
-class FileEventHandler(asyncnotifier.FileEventHandlerBase):
- def __init__(self, wm, path, cb):
- """Initializes this class.
-
- @param wm: Inotify watch manager
- @type path: string
- @param path: File path
- @type cb: callable
- @param cb: Function called on file change
-
- """
- asyncnotifier.FileEventHandlerBase.__init__(self, wm)
-
- self._cb = cb
- self._filename = os.path.basename(path)
-
- # Different Pyinotify versions have the flag constants at different places,
- # hence not accessing them directly
- mask = (pyinotify.EventsCodes.ALL_FLAGS["IN_CLOSE_WRITE"] |
- pyinotify.EventsCodes.ALL_FLAGS["IN_DELETE"] |
- pyinotify.EventsCodes.ALL_FLAGS["IN_MOVED_FROM"] |
- pyinotify.EventsCodes.ALL_FLAGS["IN_MOVED_TO"])
-
- self._handle = self.AddWatch(os.path.dirname(path), mask)
-
- def process_default(self, event):
- """Called upon inotify event.
-
- """
- if event.name == self._filename:
- logging.debug("Received inotify event %s", event)
- self._cb()
-
-
-def SetupFileWatcher(filename, cb):
- """Configures an inotify watcher for a file.
-
- @type filename: string
- @param filename: File to watch
- @type cb: callable
- @param cb: Function called on file change
-
- """
- wm = pyinotify.WatchManager()
- handler = FileEventHandler(wm, filename, cb)
- asyncnotifier.AsyncNotifier(wm, default_proc_fun=handler)
-
-
def CheckRapi(options, args):
"""Initial checks whether to run or exit with a failure.
@@ -334,15 +223,13 @@
"""
mainloop = daemon.Mainloop()
- users = RapiUsers()
+ if options.pamauth:
+ options.reqauth = True
+ authenticator = pam.PamAuthenticator()
+ else:
+ authenticator = basic_auth.BasicAuthenticator()
- handler = RemoteApiHandler(users.Get, options.reqauth)
-
- # Setup file watcher (it'll be driven by asyncore)
- SetupFileWatcher(pathutils.RAPI_USERS_FILE,
- compat.partial(users.Load, pathutils.RAPI_USERS_FILE))
-
- users.Load(pathutils.RAPI_USERS_FILE)
+ handler = RemoteApiHandler(authenticator, options.reqauth)
server = \
http.server.HttpServer(mainloop, options.bind_address, options.port,
@@ -357,10 +244,12 @@
"""Main remote API function, executed with the PID file held.
"""
+
(mainloop, server) = prep_data
try:
mainloop.Run()
finally:
+ logging.error("RAPI Daemon Failed")
server.Stop()
@@ -377,6 +266,10 @@
default=False, action="store_true",
help=("Disable anonymous HTTP requests and require"
" authentication"))
+ parser.add_option("--pam-authentication", dest="pamauth",
+ default=False, action="store_true",
+ help=("Enable RAPI authentication and authorization via"
+ " PAM"))
daemon.GenericMain(constants.RAPI, parser, CheckRapi, PrepRapi, ExecRapi,
default_ssl_cert=pathutils.RAPI_CERT_FILE,
diff --git a/lib/storage/drbd_info.py b/lib/storage/drbd_info.py
index 99605f1..4fd9a67 100644
--- a/lib/storage/drbd_info.py
+++ b/lib/storage/drbd_info.py
@@ -164,7 +164,8 @@
"""
- _VERSION_RE = re.compile(r"^version: (\d+)\.(\d+)\.(\d+)(?:\.(\d+))?"
+ _VERSION_RE = re.compile(r"^version: (\d+)\.(\d+)\.(\d+)"
+ r"(?:\.(\d+))?(?:-(\d+))?"
r" \(api:(\d+)/proto:(\d+)(?:-(\d+))?\)")
_VALID_LINE_RE = re.compile("^ *([0-9]+): cs:([^ ]+).*$")
@@ -180,6 +181,7 @@
- k_minor
- k_point
- k_fix (only on some drbd versions)
+ - k_release
- api
- proto
- proto2 (only on drbd > 8.2.X)
@@ -196,6 +198,8 @@
(version["k_major"], version["k_minor"], version["k_point"])
if "k_fix" in version:
retval += ".%s" % version["k_fix"]
+ if "k_release" in version:
+ retval += "-%s" % version["k_release"]
retval += " (api:%d/proto:%d" % (version["api"], version["proto"])
if "proto2" in version:
@@ -230,13 +234,15 @@
"k_major": int(values[0]),
"k_minor": int(values[1]),
"k_point": int(values[2]),
- "api": int(values[4]),
- "proto": int(values[5]),
+ "api": int(values[5]),
+ "proto": int(values[6]),
}
if values[3] is not None:
retval["k_fix"] = values[3]
- if values[6] is not None:
- retval["proto2"] = values[6]
+ if values[4] is not None:
+ retval["k_release"] = values[4]
+ if values[7] is not None:
+ retval["proto2"] = values[7]
return retval
diff --git a/lib/tools/cfgupgrade.py b/lib/tools/cfgupgrade.py
index 59ab1e1..6a7cef4 100644
--- a/lib/tools/cfgupgrade.py
+++ b/lib/tools/cfgupgrade.py
@@ -59,11 +59,11 @@
#: Target major version we will upgrade to
TARGET_MAJOR = 2
#: Target minor version we will upgrade to
-TARGET_MINOR = 17
+TARGET_MINOR = 18
#: Target major version for downgrade
DOWNGRADE_MAJOR = 2
#: Target minor version for downgrade
-DOWNGRADE_MINOR = 16
+DOWNGRADE_MINOR = 17
# map of legacy device types
# (mapping differing old LD_* constants to new DT_* constants)
@@ -730,21 +730,6 @@
# DOWNGRADE ------------------------------------------------------------
def DowngradeAll(self):
- if "maintenance" in self.config_data:
- del self.config_data["maintenance"]
- if "cluster" in self.config_data:
- cluster = self.config_data["cluster"]
- if "diagnose_data_collector_filename" in cluster:
- del cluster["diagnose_data_collector_filename"]
- if "data_collectors" in cluster:
- if constants.DATA_COLLECTOR_DIAGNOSE in cluster["data_collectors"]:
- del cluster["data_collectors"][constants.DATA_COLLECTOR_DIAGNOSE]
- if constants.DATA_COLLECTOR_KVM_R_S_S in cluster["data_collectors"]:
- del cluster["data_collectors"][constants.DATA_COLLECTOR_KVM_R_S_S]
- if "ipolicy" in cluster:
- ipolicy = cluster["ipolicy"]
- if "memory-ratio" in ipolicy:
- del ipolicy["memory-ratio"]
self.config_data["version"] = version.BuildVersion(DOWNGRADE_MAJOR,
DOWNGRADE_MINOR, 0)
diff --git a/lib/utils/__init__.py b/lib/utils/__init__.py
index ce89869..d51ad50 100644
--- a/lib/utils/__init__.py
+++ b/lib/utils/__init__.py
@@ -296,6 +296,8 @@
return []
cpu_list = []
for range_def in cpu_mask.split(","):
+ if range_def == constants.CPU_PINNING_ALL:
+ return [constants.CPU_PINNING_ALL_VAL]
boundaries = range_def.split("-")
n_elements = len(boundaries)
if n_elements > 2:
@@ -335,11 +337,8 @@
return []
cpu_list = []
for range_def in cpu_mask.split(constants.CPU_PINNING_SEP):
- if range_def == constants.CPU_PINNING_ALL:
- cpu_list.append([constants.CPU_PINNING_ALL_VAL, ])
- else:
- # Uniquify and sort the list before adding
- cpu_list.append(sorted(set(ParseCpuMask(range_def))))
+ # Uniquify and sort the list before adding
+ cpu_list.append(sorted(set(ParseCpuMask(range_def))))
return cpu_list
diff --git a/man/gnt-filter.rst b/man/gnt-filter.rst
index 1a5bbab..40a59e0 100644
--- a/man/gnt-filter.rst
+++ b/man/gnt-filter.rst
@@ -58,6 +58,12 @@
This predicate is true, if one of the entries of one of the opcodes
in this job satisfies the expression.
+ - ``user``. Only parameter is a boolean expression.
+ For this expression, there is only one field available ``user``,
+ which represents the authorized user submitted a job via RAPI.
+ If a job is submitted by any other interface or an authentication is
+ turned off in RAPI, it will contain an empty string.
+
- An ``action``. One of:
- ACCEPT. The job will be accepted; no further filter rules
diff --git a/man/hspace.rst b/man/hspace.rst
index 543d180..fd31715 100644
--- a/man/hspace.rst
+++ b/man/hspace.rst
@@ -16,7 +16,8 @@
Backend options:
-{ **-m** *cluster* | **-L[** *path* **]** | **-t** *data-file* |
+{ **-m** *cluster* | **-L[** *path* **] --state-of-record** |
+**-t** *data-file* |
**\--simulate** *spec* | **-I** *path* }
@@ -299,6 +300,11 @@
which is to be contacted via LUXI (an internal Ganeti protocol). The
option is described in the man page **htools**\(1).
+\--state-of-record
+ When collecting from the LUXI backend, prefer state-of-record data
+ over live data. In this way, hspace will see precisely the same data
+ that will also be presented to the instance allocator.
+
\--simulate *description*
Backend specification: similar to the **-t** option, this allows
overriding the cluster data with a simulated cluster. For details
diff --git a/qa/ganeti-qa.py b/qa/ganeti-qa.py
index 55f397c..3a2afc1 100755
--- a/qa/ganeti-qa.py
+++ b/qa/ganeti-qa.py
@@ -48,8 +48,10 @@
import qa_error
import qa_filters
import qa_group
+import qa_global_hooks
import qa_instance
import qa_iptables
+import qa_maintd
import qa_monitoring
import qa_network
import qa_node
@@ -890,6 +892,13 @@
RunTestIf("mon-collector", qa_monitoring.TestInstStatusCollector)
+def RunMaintdTests():
+ if constants.DT_DRBD8 in qa_config.GetEnabledDiskTemplates():
+ RunTestIf("maintd", qa_maintd.TestEvacuate)
+ RunTestIf("maintd", qa_maintd.TestEvacuateFailover)
+ if constants.ENABLE_RESTRICTED_COMMANDS:
+ RunTestIf("maintd", qa_maintd.TestLiveRepair)
+
PARALLEL_TEST_DICT = {
"parallel-failover": qa_performance.TestParallelInstanceFailover,
"parallel-migration": qa_performance.TestParallelInstanceMigration,
@@ -965,6 +974,8 @@
RunTestBlock(RunNetworkTests)
RunTestBlock(RunFilterTests)
+ RunTestIf("global-hooks", qa_global_hooks.RunGlobalHooksTests)
+
# The master shouldn't be readded or put offline; "delay" needs a non-master
# node to test
pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
@@ -1086,6 +1097,7 @@
qa_cluster.AssertClusterVerify()
RunTestBlock(RunMonitoringTests)
+ RunTestBlock(RunMaintdTests)
RunPerformanceTests()
diff --git a/qa/qa-sample.json b/qa/qa-sample.json
index 0a60708..9b0a1a1 100644
--- a/qa/qa-sample.json
+++ b/qa/qa-sample.json
@@ -253,6 +253,8 @@
"job-list": true,
+ "global-hooks": true,
+
"jobqueue-performance": true,
"parallel-performance": true,
@@ -275,7 +277,8 @@
"default-instance-tests": true,
"exclusive-storage-instance-tests": false,
- "mon-collector": true
+ "mon-collector": true,
+ "maintd": true
},
"options": {
diff --git a/qa/qa_global_hooks.py b/qa/qa_global_hooks.py
new file mode 100644
index 0000000..edc9f0c
--- /dev/null
+++ b/qa/qa_global_hooks.py
@@ -0,0 +1,178 @@
+#
+#
+
+# Copyright (C) 2015 Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+"""QA tests for the universal job hooks.
+
+"""
+
+import time
+
+from ganeti import constants
+from ganeti import pathutils
+from qa_config import GetMasterNode
+from qa_job_utils import ExecuteJobProducingCommand
+from qa_utils import AssertEqual, GetCommandOutput, IsFileExists, MakeNodePath
+
+PRE_PATH = "%s/global-pre.d" % pathutils.HOOKS_BASE_DIR
+POST_PATH = "%s/global-post.d" % pathutils.HOOKS_BASE_DIR
+
+
+def _GetHDir():
+ """Returns the path to the directory created for hooks temporary files
+
+ """
+ H_DIR = "%s/qa_global_hooks" % pathutils.LOG_DIR
+ master = GetMasterNode().primary
+ return MakeNodePath(master, H_DIR)
+
+
+def _GetHookFilePath(job_id, phase, status=None):
+ """Returns the path to the qa hooks temporary files.
+
+ """
+ h_fname = _GetHDir() + "/%d_OP_TEST_DELAY_%s" % (job_id, phase)
+ if phase == "pre":
+ return h_fname
+ return h_fname + "_" + status
+
+
+def TestHooksInitialize():
+ """Creates global hooks on the master node
+
+ """
+ master = GetMasterNode().primary
+ hooks_base_dir = MakeNodePath(master, pathutils.HOOKS_BASE_DIR)
+ pre_path = MakeNodePath(master, PRE_PATH)
+ post_path = MakeNodePath(master, POST_PATH)
+ GetCommandOutput(master, "mkdir -p %s" % hooks_base_dir)
+ GetCommandOutput(master, "mkdir -p %s" % pre_path)
+ GetCommandOutput(master, "mkdir -p %s" % post_path)
+ GetCommandOutput(master, "mkdir -p %s" % _GetHDir())
+ h_name = "/qa_test_hook"
+ create_hook_common = """
+FOUT=%s
+echo '#!/bin/sh' > $FOUT
+echo 'touch %s/$GANETI_JOB_ID"_"$GANETI_OP_CODE%s' >> $FOUT
+chmod +x $FOUT
+"""
+ create_pre = create_hook_common % (pre_path + h_name, _GetHDir(), '"_pre"')
+ create_post = create_hook_common % (post_path + h_name, _GetHDir(),
+ '"_post_"$GANETI_POST_STATUS')
+ GetCommandOutput(master, create_pre)
+ GetCommandOutput(master, create_post)
+
+
+def TestHookSucceeded():
+ """Checks whether the global hooks have been executed (status succes).
+
+ - Global pre hook should has been executed.
+ - Global post hook should with status *success* should has been executed.
+ - Global post hooks with failed statuses shouldn't have been executed.
+ """
+ master = GetMasterNode().primary
+ job_id = ExecuteJobProducingCommand("gnt-debug delay --submit 1")
+ time.sleep(3)
+ AssertEqual(IsFileExists(master, _GetHookFilePath(job_id, "pre")), True,
+ "Global pre hook hasn't been executed.")
+ AssertEqual(IsFileExists(master, _GetHookFilePath(job_id, "post",
+ constants.POST_HOOKS_STATUS_SUCCESS)), True,
+ "Global post hook hasn't been executed with status *success*")
+ AssertEqual(IsFileExists(master, _GetHookFilePath(job_id, "post",
+ constants.POST_HOOKS_STATUS_ERROR)), False,
+ "Global post hook has been executed with status *error*")
+ AssertEqual(IsFileExists(master, _GetHookFilePath(job_id, "post",
+ constants.POST_HOOKS_STATUS_DISAPPEARED)), False,
+ "Global post hook has been executed with status *disappeared*")
+
+
+def TestHookFailed():
+ """Checks whether the global hooks have been executed (status error).
+
+ - Global post hook should with status *error* should has been executed.
+ - Global post hook with other statuses shouldn't have been executed.
+ """
+ master = GetMasterNode().primary
+ job_id = ExecuteJobProducingCommand("gnt-debug delay --submit 0")
+ time.sleep(1)
+ AssertEqual(IsFileExists(master, _GetHookFilePath(job_id, "post",
+ constants.POST_HOOKS_STATUS_SUCCESS)), False,
+ "Global post hook has been executed with status *success*")
+ AssertEqual(IsFileExists(master, _GetHookFilePath(job_id, "post",
+ constants.POST_HOOKS_STATUS_ERROR)), True,
+ "Global post hook hasn't been executed with status *error*")
+ AssertEqual(IsFileExists(master, _GetHookFilePath(job_id, "post",
+ constants.POST_HOOKS_STATUS_DISAPPEARED)), False,
+ "Global post hook has been executed with status *disappeared*")
+
+
+def TestHookDisappeared():
+ """Checks whether the global hooks have been executed (status disappeared).
+
+ - Global pre hook should has been executed.
+ - Global post hook should with status *disappeared* should has been executed.
+ - Global post hook with other statuses shouldn't have been executed.
+ """
+ master = GetMasterNode().primary
+ job_id = ExecuteJobProducingCommand("gnt-debug delay --submit 10")
+ time.sleep(1)
+ GetCommandOutput(master, "gnt-job cancel --kill --yes-do-it %d" % job_id)
+ time.sleep(10)
+ AssertEqual(IsFileExists(master, _GetHookFilePath(job_id, "pre")), True,
+ "Global pre hook hasn't been executed.")
+ AssertEqual(IsFileExists(master, _GetHookFilePath(job_id, "post",
+ constants.POST_HOOKS_STATUS_SUCCESS)), False,
+ "Global post hook has been executed with status *success*")
+ AssertEqual(IsFileExists(master, _GetHookFilePath(job_id, "post",
+ constants.POST_HOOKS_STATUS_ERROR)), False,
+ "Global post hook has been executed with status *error*")
+ AssertEqual(IsFileExists(master, _GetHookFilePath(job_id, "post",
+ constants.POST_HOOKS_STATUS_DISAPPEARED)), True,
+ "Global post hook hasn't been executed with status *disappeared*")
+
+
+def TestHooksCleanup():
+ """Remove the directories created by the tests
+
+ """
+ master = GetMasterNode().primary
+ GetCommandOutput(master, "rm %s/*" % MakeNodePath(master, PRE_PATH))
+ GetCommandOutput(master, "rm %s/*" % MakeNodePath(master, POST_PATH))
+ GetCommandOutput(master, "rm -rf %s" % _GetHDir())
+
+
+def RunGlobalHooksTests():
+ """Runs tests for global hooks considering different job execution cases.
+
+ """
+ TestHooksInitialize()
+ TestHookSucceeded()
+ TestHookFailed()
+ TestHookDisappeared()
+ TestHooksCleanup()
diff --git a/qa/qa_instance.py b/qa/qa_instance.py
index 3650052..2a7e01d 100644
--- a/qa/qa_instance.py
+++ b/qa/qa_instance.py
@@ -556,6 +556,10 @@
AssertCommand(["gnt-instance", "migrate", "--force", "--allow-failover",
instance.name])
AssertCommand(["gnt-instance", "start", instance.name])
+
+ # We want to wait until the instance is fully booted, as the boot
+ # process gets corrupted if it is interrupted by migration.
+ qa_utils.RunInstanceCheck(instance, True)
AssertCommand(cmd)
# @InstanceCheck enforces the check that the instance is running
qa_utils.RunInstanceCheck(instance, True)
diff --git a/qa/qa_maintd.py b/qa/qa_maintd.py
new file mode 100644
index 0000000..ad33fbd
--- /dev/null
+++ b/qa/qa_maintd.py
@@ -0,0 +1,218 @@
+#
+#
+
+# Copyright (C) 2015 Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+"""Maintainance daemon tests.
+
+"""
+
+import random
+import os.path
+
+from ganeti import serializer
+from ganeti.utils import retry
+
+import qa_config
+import qa_error
+
+from qa_utils import AssertCommand, \
+ UploadData, \
+ stdout_of
+from qa_instance_utils import CreateInstanceDrbd8, \
+ RemoveInstance
+
+
+def _GetMaintTags(node):
+ tags = stdout_of([
+ "gnt-node", "list-tags", node.primary
+ ]).split()
+ return [t for t in tags if t.startswith('maintd:repairready:')]
+
+
+def _AssertRepairTagAddition(node):
+ def fn():
+ tags = _GetMaintTags(node)
+ if len(tags) == 0:
+ raise retry.RetryAgain()
+ if len(tags) > 1:
+ raise qa_error.Error("Only one tag should be added")
+ else:
+ return tags[0]
+ return retry.Retry(fn, 5.0, 500.0)
+
+
+def _AssertNodeDrained(node):
+ def fn():
+ out = stdout_of([
+ "gnt-node", "list",
+ "--output=name", "--no-headers",
+ "--filter", "drained"
+ ])
+ if node.primary not in out:
+ raise retry.RetryAgain()
+ retry.Retry(fn, 5.0, 500.0)
+
+
+def _AssertInstanceRunning(inst):
+ def fn():
+ out = stdout_of([
+ "gnt-instance", "list",
+ "--output=status", "--no-headers",
+ "--filter", "name == \"%s\"" % inst.name
+ ])
+ if "running" not in out:
+ raise retry.RetryAgain()
+ retry.Retry(fn, 5.0, 500.0)
+
+
+def _AssertInstanceMove(inst, move_type):
+ def fn():
+ out = stdout_of([
+ "gnt-job", "list",
+ "--output=status", "--no-headers",
+ "--filter", '"%s(%s)" in summary' % (move_type, inst.name)
+ ])
+ if 'success' not in out:
+ raise retry.RetryAgain()
+ retry.Retry(fn, 5.0, 500.0)
+
+
+def _AssertRepairCommand():
+ def fn():
+ out = stdout_of([
+ "gnt-job", "list",
+ "--output=status", "--no-headers",
+ "--filter", '"REPAIR_COMMAND" in summary'
+ ])
+ if 'success' not in out:
+ raise retry.RetryAgain()
+ retry.Retry(fn, 5.0, 500.0)
+
+
+def _SetUp(diagnose_dc_filename):
+ AssertCommand(["gnt-cluster", "modify", "--maintenance-interval=3"])
+ AssertCommand([
+ "gnt-cluster", "modify",
+ "--diagnose-data-collector-filename", diagnose_dc_filename
+ ])
+
+
+def _TearDown(node, tag, added_filepaths, drain_node=True):
+ AssertCommand([
+ "gnt-cluster", "modify",
+ "--diagnose-data-collector-filename", '""'
+ ])
+ AssertCommand(["rm"] + added_filepaths, node=node)
+ if drain_node:
+ AssertCommand(["gnt-node", "modify", "--drained=no", node.primary])
+ AssertCommand(["gnt-node", "remove-tags", node.primary, tag])
+ AssertCommand(["gnt-cluster", "modify", "--maintenance-interval=300"])
+
+
+def _TestEvac(filepath, filecontent, inst_move_type):
+ _SetUp(os.path.basename(filepath))
+ node1, node2 = qa_config.AcquireManyNodes(
+ 2,
+ exclude=qa_config.GetMasterNode())
+ inst = CreateInstanceDrbd8([node1, node2])
+ _AssertInstanceRunning(inst)
+ UploadData(node1.primary, filecontent, 0755, filepath)
+
+ _AssertNodeDrained(node1)
+ _AssertInstanceMove(inst, inst_move_type)
+ tag = _AssertRepairTagAddition(node1)
+
+ RemoveInstance(inst)
+ inst.Release()
+ node1.Release()
+ node2.Release()
+ _TearDown(node1, tag, [filepath])
+
+
+def TestEvacuate():
+ """Test node evacuate upon diagnosis.
+
+ """
+ n = random.randint(10000, 99999)
+ _TestEvac('/etc/ganeti/node-diagnose-commands/evacuate',
+ 'echo \'' + serializer.DumpJson({
+ "status": "evacuate",
+ "details": "qa evacuate test %d" % n}).strip() + '\'',
+ 'INSTANCE_MIGRATE')
+
+
+def TestEvacuateFailover():
+ """Test node evacuate failover upon diagnosis.
+
+ """
+ n = random.randint(10000, 99999)
+ _TestEvac('/etc/ganeti/node-diagnose-commands/evacuate-failover',
+ 'echo \'' + serializer.DumpJson({
+ "status": "evacuate-failover",
+ "details": "qa evacuate failover test %d" % n}).strip() + '\'',
+ 'INSTANCE_FAILOVER')
+
+
+def TestLiveRepair():
+ """Test node evacuate failover upon diagnosis.
+
+ """
+ _SetUp('live-repair')
+ n = random.randint(10000, 99999)
+ node = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
+ UploadData(node.primary,
+ 'echo \'' + serializer.DumpJson({
+ "status": "live-repair",
+ "command": "repair",
+ "details": str(n)}).strip() + '\'',
+ 0755,
+ '/etc/ganeti/node-diagnose-commands/live-repair')
+ UploadData(node.primary,
+ """#!/usr/bin/python
+import sys
+import json
+
+n = json.loads(sys.stdin.read())['details']
+with open('/tmp/' + n, 'w') as f:
+ f.write(n)
+print 'file written'
+""",
+ 0755,
+ '/etc/ganeti/node-repair-commands/repair')
+ _AssertRepairCommand()
+ tag = _AssertRepairTagAddition(node)
+ if str(n) != AssertCommand(["cat", "/tmp/" + str(n)], node=node)[1]:
+ raise qa_error.Error('Repair command was unsuccessful')
+ node.Release()
+ _TearDown(
+ node,
+ tag,
+ ['/etc/ganeti/node-diagnose-commands/live-repair',
+ '/etc/ganeti/node-repair-commands/repair'],
+ False)
diff --git a/qa/qa_rapi.py b/qa/qa_rapi.py
index 18142f6..2eb1d73 100644
--- a/qa/qa_rapi.py
+++ b/qa/qa_rapi.py
@@ -53,7 +53,7 @@
from ganeti import rapi
from ganeti import utils
-from ganeti.http.auth import ParsePasswordFile
+from ganeti.rapi.auth.users_file import ParsePasswordFile
import ganeti.rapi.client # pylint: disable=W0611
import ganeti.rapi.client_utils
diff --git a/qa/qa_utils.py b/qa/qa_utils.py
index a519b22..bd93904 100644
--- a/qa/qa_utils.py
+++ b/qa/qa_utils.py
@@ -1,7 +1,7 @@
#
#
-# Copyright (C) 2007, 2011, 2012, 2013 Google Inc.
+# Copyright (C) 2007, 2011, 2012, 2013, 2015 Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -487,23 +487,32 @@
"""
if filename:
- tmp = "tmp=%s" % utils.ShellQuote(filename)
- else:
- tmp = ('tmp=$(mktemp --tmpdir gnt.XXXXXX) && '
- 'chmod %o "${tmp}"') % mode
- cmd = ("%s && "
- "[[ -f \"${tmp}\" ]] && "
- "cat > \"${tmp}\" && "
- "echo \"${tmp}\"") % tmp
+ quoted_filename = utils.ShellQuote(filename)
+ directory = utils.ShellQuote(os.path.dirname(filename))
- p = subprocess.Popen(GetSSHCommand(node, cmd), shell=False,
- stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+ cmd = " && ".join([
+ "mkdir -p %s" % directory,
+ "cat > %s" % quoted_filename,
+ "chmod %o %s" % (mode, quoted_filename)])
+ else:
+ cmd = " && ".join([
+ 'tmp=$(mktemp --tmpdir gnt.XXXXXX)',
+ 'chmod %o "${tmp}"' % mode,
+ 'cat > "${tmp}"',
+ 'echo "${tmp}"'])
+
+ p = subprocess.Popen(GetSSHCommand(node, cmd),
+ shell=False,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE)
p.stdin.write(data)
p.stdin.close()
AssertEqual(p.wait(), 0)
- # Return temporary filename
- return _GetCommandStdout(p).strip()
+ if filename:
+ return filename
+ else:
+ return _GetCommandStdout(p).strip()
def BackupFile(node, path):
@@ -528,6 +537,17 @@
return result
+def IsFileExists(node, path):
+ """Checks if a file on the node exists.
+
+ """
+ cmd = ("[[ -f \"%s\" ]] && echo yes || echo no" % path)
+
+ # Return temporary filename
+ result = GetCommandOutput(node, cmd).strip()
+ return True if result == "yes" else False
+
+
@contextlib.contextmanager
def CheckFileUnmodified(node, filename):
"""Checks that the content of a given file remains the same after running a
diff --git a/src/Ganeti/Codec.hs b/src/Ganeti/Codec.hs
index 6f54c0d..b3b4c87 100644
--- a/src/Ganeti/Codec.hs
+++ b/src/Ganeti/Codec.hs
@@ -1,10 +1,12 @@
+{-# LANGUAGE CPP #-}
+
{-| Provides interface to the 'zlib' library.
-}
{-
-Copyright (C) 2014 Google Inc.
+Copyright (C) 2014, 2016 Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -43,9 +45,9 @@
import Codec.Compression.Zlib
import qualified Codec.Compression.Zlib.Internal as I
import Control.Monad (liftM)
-import Control.Monad.Error.Class (MonadError(..))
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Internal as BL
+import Control.Monad.Error.Class (MonadError(..))
import Ganeti.BasicTypes
@@ -59,6 +61,13 @@
-- 'throwError'.
decompressZlib :: (MonadError e m, FromString e)
=> BL.ByteString -> m BL.ByteString
+#if MIN_VERSION_zlib(0, 6, 0)
+decompressZlib = I.foldDecompressStreamWithInput
+ (liftM . BL.chunk)
+ return
+ (throwError . mkFromString . (++)"Zlib: " . show)
+ $ I.decompressST I.zlibFormat I.defaultDecompressParams
+#else
decompressZlib = I.foldDecompressStream
(liftM . BL.chunk)
(return mempty)
@@ -66,3 +75,4 @@
. I.decompressWithErrors
I.zlibFormat
I.defaultDecompressParams
+#endif
diff --git a/src/Ganeti/Confd/Server.hs b/src/Ganeti/Confd/Server.hs
index a2ec0a9..0975dac 100644
--- a/src/Ganeti/Confd/Server.hs
+++ b/src/Ganeti/Confd/Server.hs
@@ -132,7 +132,7 @@
-- | Returns a node name for a given UUID
uuidToNodeName :: ConfigData -> String -> Result String
-uuidToNodeName cfg uuid = gntErrorToResult $ nodeName <$> getNode cfg uuid
+uuidToNodeName cfg uuid = gntErrorToResult $ nodeName <$> getNodeByUuid cfg uuid
-- | Encodes a list of minors into a JSON representation, converting UUIDs to
-- names in the process
@@ -154,7 +154,7 @@
EmptyQuery -> liftM ((ReplyStatusOk,,serial) . J.showJSON) master_name
PlainQuery _ -> return queryArgumentError
DictQuery reqq -> do
- mnode <- gntErrorToResult $ getNode cfg master_uuid
+ mnode <- gntErrorToResult $ getNodeByUuid cfg master_uuid
mname <- master_name
let fvals = map (\field -> case field of
ReqFieldName -> mname
@@ -255,7 +255,7 @@
case getInstance cfg inst_name of
Ok i -> return i
Bad e -> fail $ "Instance not found in the configuration: " ++ show e
- case getInstDisks cfg . uuidOf $ inst of
+ case getInstDisks cfg inst of
Ok disks -> return (ReplyStatusOk, J.showJSON disks, instSerial inst)
Bad e -> fail $ "Could not retrieve disks: " ++ show e
diff --git a/src/Ganeti/Config.hs b/src/Ganeti/Config.hs
index 5687b54..fdfe4f6 100644
--- a/src/Ganeti/Config.hs
+++ b/src/Ganeti/Config.hs
@@ -51,6 +51,7 @@
, getMasterNetworkParameters
, getOnlineNodes
, getNode
+ , getNodeByUuid
, getInstance
, getDisk
, getFilterRule
@@ -65,7 +66,6 @@
, getInstMinorsForNode
, getInstAllNodes
, getInstDisks
- , getInstDisksFromObj
, getDrbdMinorsForDisk
, getDrbdMinorsForInstance
, getFilledHvStateParams
@@ -152,7 +152,7 @@
-- the secondaries.
instDiskNodes :: ConfigData -> Instance -> S.Set String
instDiskNodes cfg inst =
- case getInstDisksFromObj cfg inst of
+ case getInstDisks cfg inst of
Ok disks -> S.unions $ map computeDiskNodes disks
Bad _ -> S.empty
@@ -279,33 +279,45 @@
ECodeNoEnt
in maybe err Ok $ M.lookup name' allitems
+-- | Looks up a node by uuid.
+getNodeByUuid :: ConfigData -> String -> ErrorResult Node
+getNodeByUuid cfg uuid =
+ let nodes = fromContainer (configNodes cfg)
+ in getItem' "Node" uuid nodes
+
+-- | Looks up a node by name matching.
+getNodeByPartialName :: ConfigData -> String -> ErrorResult Node
+getNodeByPartialName cfg name =
+ let nodes = fromContainer (configNodes cfg)
+ by_name = M.mapKeys (nodeName . (M.!) nodes) nodes
+ in getItem "Node" name by_name
+
-- | Looks up a node by name or uuid.
getNode :: ConfigData -> String -> ErrorResult Node
getNode cfg name =
- let nodes = fromContainer (configNodes cfg)
- in case getItem' "Node" name nodes of
- -- if not found by uuid, we need to look it up by name
- Ok node -> Ok node
- Bad _ -> let by_name = M.mapKeys
- (nodeName . (M.!) nodes) nodes
- in getItem "Node" name by_name
+ case getNodeByUuid cfg name of
+ -- if not found by uuid, we need to look it up by name
+ x@(Ok _) -> x
+ Bad _ -> getNodeByPartialName cfg name
--- | Looks up an instance by name or uuid.
-getInstance :: ConfigData -> String -> ErrorResult Instance
-getInstance cfg name =
+-- | Looks up an instance by uuid.
+getInstanceByUuid :: ConfigData -> String -> ErrorResult Instance
+getInstanceByUuid cfg uuid =
let instances = fromContainer (configInstances cfg)
- in case getItem' "Instance" name instances of
- -- if not found by uuid, we need to look it up by name
- Ok inst -> Ok inst
- Bad _ -> let by_name =
- M.delete ""
- . M.mapKeys (fromMaybe "" . instName . (M.!) instances)
- $ instances
- in getItem "Instance" name by_name
+ in getItem' "Instance" uuid instances
--- | Looks up an instance by exact name match
-getInstanceByName :: ConfigData -> String -> ErrorResult Instance
-getInstanceByName cfg name =
+-- | Looks up an instance by approximate name.
+getInstanceByPartialName :: ConfigData -> String -> ErrorResult Instance
+getInstanceByPartialName cfg name =
+ let instances = fromContainer (configInstances cfg)
+ by_name = M.delete ""
+ . M.mapKeys (fromMaybe "" . instName . (M.!) instances)
+ $ instances
+ in getItem "Instance" name by_name
+
+-- | Looks up an instance by exact name match.
+getInstanceByExactName :: ConfigData -> String -> ErrorResult Instance
+getInstanceByExactName cfg name =
let instances = M.elems . fromContainer . configInstances $ cfg
matching = F.find (maybe False (== name) . instName) instances
in case matching of
@@ -314,6 +326,13 @@
("Instance name " ++ name ++ " not found")
ECodeNoEnt
+-- | Looks up an instance by partial name or uuid.
+getInstance :: ConfigData -> String -> ErrorResult Instance
+getInstance cfg name =
+ case getInstanceByUuid cfg name of
+ x@(Ok _) -> x
+ Bad _ -> getInstanceByPartialName cfg name
+
-- | Looks up a disk by uuid.
getDisk :: ConfigData -> String -> ErrorResult Disk
getDisk cfg name =
@@ -447,17 +466,17 @@
-- | Looks up an instance's primary node.
getInstPrimaryNode :: ConfigData -> String -> ErrorResult Node
getInstPrimaryNode cfg name =
- getInstanceByName cfg name
+ getInstanceByExactName cfg name
>>= withMissingParam "Instance without primary node" return . instPrimaryNode
- >>= getNode cfg
+ >>= getNodeByUuid cfg
-- | Retrieves all nodes hosting a DRBD disk
getDrbdDiskNodes :: ConfigData -> Disk -> [Node]
getDrbdDiskNodes cfg disk =
let retrieved = case diskLogicalId disk of
Just (LIDDrbd8 nodeA nodeB _ _ _ _) ->
- justOk [getNode cfg nodeA, getNode cfg nodeB]
- _ -> []
+ justOk [getNodeByUuid cfg nodeA, getNodeByUuid cfg nodeB]
+ _ -> []
in retrieved ++ concatMap (getDrbdDiskNodes cfg) (diskChildren disk)
-- | Retrieves all the nodes of the instance.
@@ -466,22 +485,15 @@
-- the primary node has to be appended to the results.
getInstAllNodes :: ConfigData -> String -> ErrorResult [Node]
getInstAllNodes cfg name = do
- inst <- getInstanceByName cfg name
- inst_disks <- getInstDisksFromObj cfg inst
+ inst <- getInstanceByExactName cfg name
+ inst_disks <- getInstDisks cfg inst
let disk_nodes = concatMap (getDrbdDiskNodes cfg) inst_disks
pNode <- getInstPrimaryNode cfg name
return . nub $ pNode:disk_nodes
--- | Get disks for a given instance.
--- The instance is specified by name or uuid.
-getInstDisks :: ConfigData -> String -> ErrorResult [Disk]
-getInstDisks cfg iname =
- getInstance cfg iname >>= mapM (getDisk cfg) . instDisks
-
-- | Get disks for a given instance object.
-getInstDisksFromObj :: ConfigData -> Instance -> ErrorResult [Disk]
-getInstDisksFromObj cfg =
- getInstDisks cfg . uuidOf
+getInstDisks :: ConfigData -> Instance -> ErrorResult [Disk]
+getInstDisks cfg = mapM (getDisk cfg) . instDisks
-- | Collects a value for all DRBD disks
collectFromDrbdDisks
@@ -522,7 +534,7 @@
getDrbdMinorsForInstance :: ConfigData -> Instance
-> ErrorResult [(Int, String)]
getDrbdMinorsForInstance cfg =
- liftM (concatMap getDrbdMinorsForDisk) . getInstDisksFromObj cfg
+ liftM (concatMap getDrbdMinorsForDisk) . getInstDisks cfg
-- | String for primary role.
rolePrimary :: String
@@ -543,7 +555,7 @@
then rolePrimary
else roleSecondary
iname = fromMaybe "" $ instName inst
- inst_disks = case getInstDisksFromObj cfg inst of
+ inst_disks = case getInstDisks cfg inst of
Ok disks -> disks
Bad _ -> []
-- FIXME: the disk/ build there is hack-ish; unify this in a
@@ -640,7 +652,7 @@
withMissingParam "Instance without Primary Node"
(\i -> return $ MM.fromList . lvsByNode i)
(instPrimaryNode inst)
- <*> getInstDisksFromObj cd inst
+ <*> getInstDisks cd inst
where
lvsByNode :: String -> [Disk] -> [(String, LogicalVolume)]
lvsByNode node = concatMap (lvsByNode1 node)
diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs
index 13bff2e..1259978 100644
--- a/src/Ganeti/Constants.hs
+++ b/src/Ganeti/Constants.hs
@@ -724,6 +724,26 @@
hooksVersion :: Int
hooksVersion = 2
+-- * Global hooks related constants
+
+globalHooksDir :: String
+globalHooksDir = "global"
+
+globalHooksMaster :: String
+globalHooksMaster = "master"
+
+globalHooksNotMaster :: String
+globalHooksNotMaster = "not_master"
+
+postHooksStatusSuccess :: String
+postHooksStatusSuccess = "success"
+
+postHooksStatusError :: String
+postHooksStatusError = "error"
+
+postHooksStatusDisappeared :: String
+postHooksStatusDisappeared = "disappeared"
+
-- * Hooks subject type (what object type does the LU deal with)
htypeCluster :: String
@@ -1641,6 +1661,9 @@
hvCpuMask :: String
hvCpuMask = "cpu_mask"
+hvWorkerCpuMask :: String
+hvWorkerCpuMask = "worker_cpu_mask"
+
hvCpuSockets :: String
hvCpuSockets = "cpu_sockets"
@@ -1895,6 +1918,7 @@
, (hvCpuCap, VTypeInt)
, (hvCpuCores, VTypeInt)
, (hvCpuMask, VTypeString)
+ , (hvWorkerCpuMask, VTypeString)
, (hvCpuSockets, VTypeInt)
, (hvCpuThreads, VTypeInt)
, (hvCpuType, VTypeString)
@@ -4123,6 +4147,7 @@
, (hvMemPath, PyValueEx "")
, (hvRebootBehavior, PyValueEx instanceRebootAllowed)
, (hvCpuMask, PyValueEx cpuPinningAll)
+ , (hvWorkerCpuMask, PyValueEx cpuPinningAll)
, (hvCpuType, PyValueEx "")
, (hvCpuCores, PyValueEx (0 :: Int))
, (hvCpuThreads, PyValueEx (0 :: Int))
@@ -4900,6 +4925,10 @@
opcodeReasonSrcRlib2,
opcodeReasonSrcUser]
+-- | A reason content prefix for RAPI auth user
+opcodeReasonAuthUser :: String
+opcodeReasonAuthUser = "RAPI-Auth:"
+
-- | Path generating random UUID
randomUuidFile :: String
randomUuidFile = ConstantUtils.randomUuidFile
@@ -5063,6 +5092,9 @@
uuidRegex :: String
uuidRegex = "^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$"
+dummyUuid :: String
+dummyUuid = "deadbeef-dead-beed-dead-beefdeadbeef"
+
-- * Luxi constants
luxiSocketPerms :: Int
@@ -5545,6 +5577,13 @@
maintdFailureTagPrefix :: String
maintdFailureTagPrefix = maintdPrefix ++ "repairfailed:"
+-- * RAPI PAM auth related constants
+
+-- | The name of ganeti rapi specific http header containing additional user
+-- credentials
+httpRapiPamCredential :: String
+httpRapiPamCredential = "Ganeti-RAPI-Credential"
+
-- | The polling frequency to wait for a job status change
cliWfjcFrequency :: Int
cliWfjcFrequency = 20
@@ -5552,4 +5591,3 @@
-- | Default 'WaitForJobChange' timeout in seconds
defaultWfjcTimeout :: Int
defaultWfjcTimeout = 60
-
diff --git a/src/Ganeti/HTools/AlgorithmParams.hs b/src/Ganeti/HTools/AlgorithmParams.hs
index 8a53e69..820c35d 100644
--- a/src/Ganeti/HTools/AlgorithmParams.hs
+++ b/src/Ganeti/HTools/AlgorithmParams.hs
@@ -52,6 +52,14 @@
-- in cluster score more than
-- algDiskMovesFactor times higher than
-- the gain in migration moves
+ , algLongSolutionThreshold :: Int -- ^ Threshold for long-time solutions
+ , algLongSolutionsFactor :: Double -- ^ Allow only long solutions,
+ -- whose K/N metrics are more,
+ -- than algLongSolutionsFactor,
+ -- where K is the number of times cluster
+ -- metric has increased and N is how much
+ -- the estimated time to perform
+ -- this solution exceeds the threshold
, algInstanceMoves :: Bool -- ^ Whether instance moves are allowed
, algRestrictedMigration :: Bool -- ^ Whether migration is restricted
, algIgnoreSoftErrors :: Bool -- ^ Whether to always ignore soft errors
@@ -73,6 +81,8 @@
fromCLIOptions opts = AlgorithmOptions
{ algDiskMoves = CLI.optDiskMoves opts
, algDiskMovesFactor = CLI.optAvoidDiskMoves opts
+ , algLongSolutionThreshold = CLI.optLongSolutionThreshold opts
+ , algLongSolutionsFactor = CLI.optAvoidLongSolutions opts
, algInstanceMoves = CLI.optInstMoves opts
, algRestrictedMigration = CLI.optRestrictedMigrate opts
, algIgnoreSoftErrors = CLI.optIgnoreSoftErrors opts
diff --git a/src/Ganeti/HTools/Backend/Luxi.hs b/src/Ganeti/HTools/Backend/Luxi.hs
index 639d74d..d74294b 100644
--- a/src/Ganeti/HTools/Backend/Luxi.hs
+++ b/src/Ganeti/HTools/Backend/Luxi.hs
@@ -137,24 +137,27 @@
queryGroups = liftM errToResult . L.callMethod queryGroupsMsg
-- | Parse a instance list in JSON format.
-getInstances :: NameAssoc
+getInstances :: Bool -- ^ use only state-of-record (SoR) data
+ -> NameAssoc
-> JSValue
-> Result [(String, Instance.Instance)]
-getInstances ktn arr = L.extractArray arr >>= mapM (parseInstance ktn)
+getInstances sor ktn arr = L.extractArray arr >>= mapM (parseInstance sor ktn)
-- | Construct an instance from a JSON object.
-parseInstance :: NameAssoc
+parseInstance :: Bool -- ^ use only state-of-record (SoR) data
+ -> NameAssoc
-> [(JSValue, JSValue)]
-> Result (String, Instance.Instance)
-parseInstance ktn [ name, disk, mem, vcpus
- , status, pnode, snodes, tags, oram
- , auto_balance, disk_template, su
- , dsizes, dspindles, forthcoming ] = do
+parseInstance sor ktn
+ [ name, disk, mem, vcpus, status, pnode, snodes, tags, oram
+ , auto_balance, disk_template, su, dsizes, dspindles, forthcoming ] = do
xname <- annotateResult "Parsing new instance" (L.fromJValWithStatus name)
let convert a = genericConvert "Instance" xname a
xdisk <- convert "disk_usage" disk
- xmem <- case oram of -- FIXME: remove the "guessing"
- (_, JSRational _ _) -> convert "oper_ram" oram
+ xmem <- case (sor, oram) of -- FIXME: remove the "guessing"
+ (False, (_, JSRational _ _)) -> convert "oper_ram" oram
+ -- Note: "oper_ram" is live data; we only use it if not told
+ -- to restrict to state-of-record data
_ -> convert "be/memory" mem
xvcpus <- convert "be/vcpus" vcpus
xpnode <- convert "pnode" pnode >>= lookupNode ktn xname
@@ -176,7 +179,7 @@
xforthcoming
return (xname, inst)
-parseInstance _ v = fail ("Invalid instance query result: " ++ show v)
+parseInstance _ _ v = fail ("Invalid instance query result: " ++ show v)
-- | Parse a node list in JSON format.
getNodes :: NameAssoc -> JSValue -> Result [(String, Node.Node)]
@@ -279,20 +282,22 @@
-- | Converts the output of 'readData' into the internal cluster
-- representation.
-parseData :: (Result JSValue, Result JSValue, Result JSValue, Result JSValue)
+parseData :: Bool -- ^ use only state-of-record (SoR) data
+ -> (Result JSValue, Result JSValue, Result JSValue, Result JSValue)
-> Result ClusterData
-parseData (groups, nodes, instances, cinfo) = do
+parseData sor (groups, nodes, instances, cinfo) = do
group_data <- groups >>= getGroups
let (group_names, group_idx) = assignIndices group_data
node_data <- nodes >>= getNodes group_names
let (node_names, node_idx) = assignIndices node_data
- inst_data <- instances >>= getInstances node_names
+ inst_data <- instances >>= getInstances sor node_names
let (_, inst_idx) = assignIndices inst_data
(ctags, cpol, master) <- cinfo >>= getClusterData
node_idx' <- setMaster node_names node_idx master
return (ClusterData group_idx node_idx' inst_idx ctags cpol)
-- | Top level function for data loading.
-loadData :: String -- ^ Unix socket to use as source
+loadData :: Bool -- ^ use only state-of-record (SoR) data
+ -> String -- ^ Unix socket to use as source
-> IO (Result ClusterData)
-loadData = fmap parseData . readData
+loadData sor = fmap (parseData sor) . readData
diff --git a/src/Ganeti/HTools/CLI.hs b/src/Ganeti/HTools/CLI.hs
index 110375e..eb9560b 100644
--- a/src/Ganeti/HTools/CLI.hs
+++ b/src/Ganeti/HTools/CLI.hs
@@ -56,6 +56,8 @@
, oDataFile
, oDiskMoves
, oAvoidDiskMoves
+ , oLongSolutionThreshold
+ , oAvoidLongSolutions
, oDiskTemplate
, oDryRun
, oSpindleUse
@@ -116,6 +118,7 @@
, oShowVer
, oShowComp
, oSkipNonRedundant
+ , oSoR
, oStdSpec
, oTargetResources
, oTieredSpec
@@ -150,6 +153,15 @@
, optAvoidDiskMoves :: Double -- ^ Allow only disk moves improving
-- cluster score in more than
-- optAvoidDiskMoves times
+ , optLongSolutionThreshold :: Int -- ^ The threshold in seconds,
+ -- that defines long-time solutions
+ , optAvoidLongSolutions :: Double -- ^ Allow only long solutions,
+ -- whose K/N metrics are more,
+ -- than algLongSolutionsFactor,
+ -- where K is the number of times cluster
+ -- metric has increased and N is how much
+ -- the estimated time to perform
+ -- this solution exceeds the threshold
, optInstMoves :: Bool -- ^ Allow instance moves
, optDiskTemplate :: Maybe DiskTemplate -- ^ Override for the disk template
, optSpindleUse :: Maybe Int -- ^ Override for the spindle usage
@@ -160,6 +172,7 @@
, optIgnoreSoftErrors :: Bool -- ^ Ignore soft errors in balancing moves
, optIndependentGroups :: Bool -- ^ consider groups independently
, optAcceptExisting :: Bool -- ^ accept existing N+1 violations
+ , optSoR :: Bool -- ^ only use state-of-record data
, optMonD :: Bool -- ^ Query MonDs
, optMonDFile :: Maybe FilePath -- ^ Optional file with data provided
-- by MonDs
@@ -232,6 +245,8 @@
{ optDataFile = Nothing
, optDiskMoves = True
, optAvoidDiskMoves = 1.0
+ , optLongSolutionThreshold = 1000
+ , optAvoidLongSolutions = 0.0
, optInstMoves = True
, optIndependentGroups = False
, optAcceptExisting = False
@@ -241,6 +256,7 @@
, optIdleDefault = False
, optIgnoreSoftErrors = False
, optDynuFile = Nothing
+ , optSoR = False
, optMonD = False
, optMonDFile = Nothing
, optMonDXen = False
@@ -367,6 +383,24 @@
\ admit disk move during the step",
OptComplFloat)
+oLongSolutionThreshold :: OptType
+oLongSolutionThreshold =
+ (Option "" ["long-solution-threshold"]
+ (reqWithConversion (tryRead "long-time solution threshold")
+ (\f opts -> Ok opts { optLongSolutionThreshold = f }) "FACTOR")
+ "specify the threshold in seconds, that defines long-time solutions",
+ OptComplInteger)
+
+oAvoidLongSolutions :: OptType
+oAvoidLongSolutions =
+ (Option "" ["avoid-long-solutions"]
+ (reqWithConversion (tryRead "long-time solutions avoiding factor")
+ (\f opts -> Ok opts { optAvoidLongSolutions = f }) "FACTOR")
+ "solution should increase cluster metric in more times,\
+ \ than it's estimated time multiplied by avoiding factor\
+ \ exceeds the threshold",
+ OptComplFloat)
+
oMonD :: OptType
oMonD =
(Option "" ["mond"]
@@ -405,6 +439,13 @@
"Rescale the weight of the memory utilization by the given factor",
OptComplFloat)
+oSoR :: OptType
+oSoR =
+ (Option "" ["state-of-record"]
+ (NoArg (\ opts -> Ok opts { optSoR = True }))
+ "only use state-of-record data",
+ OptComplNone)
+
oMonDExitMissing :: OptType
oMonDExitMissing =
(Option "" ["exit-on-missing-mond-data"]
diff --git a/src/Ganeti/HTools/Cluster.hs b/src/Ganeti/HTools/Cluster.hs
index 65746fd..9385df5 100644
--- a/src/Ganeti/HTools/Cluster.hs
+++ b/src/Ganeti/HTools/Cluster.hs
@@ -98,7 +98,7 @@
, (\\)
, sort
, intercalate)
-import Data.Maybe (fromJust, fromMaybe, isJust, isNothing)
+import Data.Maybe (fromJust, fromMaybe, isJust, isNothing, mapMaybe)
import Data.Ord (comparing)
import Text.Printf (printf)
@@ -115,7 +115,8 @@
import Ganeti.HTools.Cluster.Evacuate ( EvacSolution(..), emptyEvacSolution
, updateEvacSolution, reverseEvacSolution
, nodeEvacInstance)
-import Ganeti.HTools.Cluster.Metrics (compCV, compClusterStatistics)
+import Ganeti.HTools.Cluster.Metrics (compCV, compClusterStatistics
+ , optimalCVScore)
import Ganeti.HTools.Cluster.Moves (applyMoveEx)
import Ganeti.HTools.Cluster.Utils (splitCluster, instancePriGroup
, availableGroupNodes, iMoveToJob)
@@ -430,12 +431,81 @@
foldl' (checkSingleStep force ini_tbl target) best_migr_tbl disk_moves
in (best_migr_tbl, best_tbl)
+-- | The default network bandwidth value in Mbit/s
+defaultBandwidth :: Int
+defaultBandwidth = 100
+
+-- | Compute network bandwidth during given move in Mbit/s
+plcBandwidth :: Table -> Placement -> Int
+plcBandwidth (Table nl _ _ _) (_, pn, sn, move, _) =
+ fromMaybe defaultBandwidth (Node.calcBandwidthToNode src dst)
+ where getNode ndx = Container.find ndx nl
+ (src, dst) = case move of
+ Failover -> (getNode pn, getNode sn)
+ FailoverToAny ndx -> (getNode pn, getNode ndx)
+ ReplacePrimary ndx -> (getNode pn, getNode ndx)
+ ReplaceSecondary ndx -> (getNode sn, getNode ndx)
+ ReplaceAndFailover ndx -> (getNode sn, getNode ndx)
+ FailoverAndReplace ndx -> (getNode sn, getNode ndx)
+
+-- | Compute the amount of data to be moved
+moveVolume :: IMove -> Instance.Instance -> Int
+moveVolume Failover inst = Instance.mem inst
+moveVolume (FailoverToAny _) inst = Instance.mem inst
+moveVolume _ inst = Instance.mem inst + Instance.dsk inst
+
+-- | Compute the estimated time to perform move
+placementTimeEstimation :: Table -> Placement -> Double
+placementTimeEstimation tbl@(Table _ il _ _)
+ plc@(idx, _, _, move, _) =
+ (fromIntegral volume * 8) / fromIntegral bandwidth
+ where volume = moveVolume move (Container.find idx il)
+ bandwidth = plcBandwidth tbl plc
+
+-- | Compute the estimated time to perform solution
+solutionTimeEstimation :: Table -> Double
+solutionTimeEstimation fin_tbl@(Table _ _ _ plcs) = sum times
+ where times = map (placementTimeEstimation fin_tbl) plcs
+
+-- | Filter long-time solutions without enough gain
+filterLongSolutions :: AlgorithmOptions
+ -> Table
+ -> Table
+ -> Maybe Table
+filterLongSolutions opts ini_tbl fin_tbl =
+ let long_sol_th = fromIntegral $ algLongSolutionThreshold opts
+ long_sol_f = algLongSolutionsFactor opts
+ fin_t = solutionTimeEstimation fin_tbl
+ time_metric = fin_t / long_sol_th
+ Table nl _ ini_cv _ = ini_tbl
+ Table _ _ fin_cv _ = fin_tbl
+ opt_cv = optimalCVScore nl
+ improvement = (ini_cv - opt_cv) / (fin_cv - opt_cv)
+ in if long_sol_f < 0.01 ||
+ fin_t < long_sol_th ||
+ fin_cv == opt_cv ||
+ improvement > long_sol_f * time_metric
+ then Just fin_tbl
+ else Nothing
+
+-- | Filter solutions without enough gain
+filterMoveByGain :: AlgorithmOptions -> Table -> Table -> Maybe Table
+filterMoveByGain opts ini_tbl fin_tbl =
+ let mg_limit = algMinGainLimit opts
+ min_gain = algMinGain opts
+ Table _ _ ini_cv _ = ini_tbl
+ Table _ _ fin_cv _ = fin_tbl
+ in if fin_cv < ini_cv && (ini_cv > mg_limit
+ || ini_cv - fin_cv > min_gain)
+ then Just fin_tbl -- this round made success, return the new table
+ else Nothing
+
-- | Compute the best next move.
checkMove :: AlgorithmOptions -- ^ Algorithmic options for balancing
-> [Ndx] -- ^ Allowed target node indices
-> Table -- ^ The current solution
-> [Instance.Instance] -- ^ List of instances still to move
- -> Table -- ^ The new solution
+ -> Maybe Table -- ^ The new solution
checkMove opts nodes_idx ini_tbl@(Table _ _ ini_cv _) victims =
let disk_moves = algDiskMoves opts
disk_moves_f = algDiskMovesFactor opts
@@ -447,15 +517,20 @@
table_pairs = parMap rwhnf (checkInstanceMove opts nodes_idx ini_tbl)
victims
+ wout_disk_moves_tbl = mapMaybe longSolFilter (map fst table_pairs)
+ with_disk_moves_tbl = mapMaybe longSolFilter (map snd table_pairs)
+
-- iterate over all instances, computing the best move
best_migr_tbl@(Table _ _ best_migr_cv _) =
- foldl' compareTables ini_tbl $ map fst table_pairs
+ foldl' compareTables ini_tbl wout_disk_moves_tbl
best_tbl@(Table _ _ best_cv _) =
- foldl' compareTables ini_tbl $ map snd table_pairs
- in if not disk_moves
- || ini_cv - best_cv <= (ini_cv - best_migr_cv) * disk_moves_f
- then best_migr_tbl
- else best_tbl -- best including disk moves
+ foldl' compareTables ini_tbl with_disk_moves_tbl
+ best_sol = if not disk_moves
+ || ini_cv - best_cv <= (ini_cv - best_migr_cv) * disk_moves_f
+ then best_migr_tbl
+ else best_tbl -- best including disk moves
+ in Just best_sol >>= filterMoveByGain opts ini_tbl
+ where longSolFilter = filterLongSolutions opts ini_tbl
-- | Check if we are allowed to go deeper in the balancing.
doNextBalance :: Table -- ^ The starting table
@@ -473,9 +548,7 @@
-> Maybe Table -- ^ The resulting table and commands
tryBalance opts ini_tbl =
let evac_mode = algEvacMode opts
- mg_limit = algMinGainLimit opts
- min_gain = algMinGain opts
- Table ini_nl ini_il ini_cv _ = ini_tbl
+ Table ini_nl ini_il _ _ = ini_tbl
all_inst = Container.elems ini_il
all_nodes = Container.elems ini_nl
(offline_nodes, online_nodes) = partition Node.offline all_nodes
@@ -493,12 +566,7 @@
allowed_inst = liftA2 (&&) (allowed_node . Instance.pNode)
(liftA2 (||) allowed_node (< 0) . Instance.sNode)
good_reloc_inst = filter allowed_inst reloc_inst
- fin_tbl = checkMove opts good_nidx ini_tbl good_reloc_inst
- (Table _ _ fin_cv _) = fin_tbl
- in
- if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
- then Just fin_tbl -- this round made success, return the new table
- else Nothing
+ in checkMove opts good_nidx ini_tbl good_reloc_inst
-- * Allocation functions
diff --git a/src/Ganeti/HTools/ExtLoader.hs b/src/Ganeti/HTools/ExtLoader.hs
index b322cb3..64972ef 100644
--- a/src/Ganeti/HTools/ExtLoader.hs
+++ b/src/Ganeti/HTools/ExtLoader.hs
@@ -114,7 +114,7 @@
input_data <-
case () of
_ | setRapi -> wrapIO $ Rapi.loadData mhost
- | setLuxi -> wrapIO . Luxi.loadData $ fromJust lsock
+ | setLuxi -> wrapIO . Luxi.loadData (optSoR opts) $ fromJust lsock
| setSim -> Simu.loadData simdata
| setFile -> wrapIO . Text.loadData $ fromJust tfile
| setIAllocSrc -> wrapIO . IAlloc.loadData $ fromJust iallocsrc
diff --git a/src/Ganeti/HTools/Loader.hs b/src/Ganeti/HTools/Loader.hs
index 2294468..3bb1466 100644
--- a/src/Ganeti/HTools/Loader.hs
+++ b/src/Ganeti/HTools/Loader.hs
@@ -326,6 +326,26 @@
let ntags = Node.nTags node
in Node.setLocationTags node $ Tags.getLocations ctags ntags
+-- | Set bandwidth map on a node, according to
+-- cluster tags, group tags and node tags
+addBandwidthData :: [String]
+ -> Group.List -> Node.Node -> Node.Node
+addBandwidthData ctags gl node =
+ let grp = Container.find (Node.group node) gl
+ nbtags = btagsFilter (Node.nTags node)
+ gbtags = btagsFilter (Group.allTags grp)
+ cbtags = btagsFilter ctags
+ btags = Tags.mergeByPrefixes cbtags $
+ Tags.mergeByPrefixes gbtags nbtags
+ bgraph = Tags.getBandwidthGraph ctags
+ tnode = Node.setBandwidthTags node btags
+ update nd (src, dst, bndwdth)
+ | Set.member src btags = Node.setBandwidthToLocation nd dst bndwdth
+ | Set.member dst btags = Node.setBandwidthToLocation nd src bndwdth
+ | otherwise = nd
+ in foldl update tnode bgraph
+ where btagsFilter tags = Tags.getBandwidth ctags tags
+
-- | Initializer function that loads the data from a node and instance
-- list and massages it into the correct format.
mergeData :: [(String, DynUtil)] -- ^ Instance utilisation data
@@ -366,8 +386,9 @@
(`Node.buildPeers` il4)) nl3
il6 = Container.map (disableSplitMoves nl3) il5
nl5 = Container.map (addMigrationTags ctags) nl4
+ nl6 = Container.map (addBandwidthData ctags gl) nl5
in if' (null lkp_unknown)
- (Ok cdata { cdNodes = nl5, cdInstances = il6 })
+ (Ok cdata { cdNodes = nl6, cdInstances = il6 })
(Bad $ "Unknown instance(s): " ++ show(map lrContent lkp_unknown))
-- | In a cluster description, clear dynamic utilisation information.
diff --git a/src/Ganeti/HTools/Node.hs b/src/Ganeti/HTools/Node.hs
index 6749568..5fa62d6 100644
--- a/src/Ganeti/HTools/Node.hs
+++ b/src/Ganeti/HTools/Node.hs
@@ -60,6 +60,8 @@
, setMigrationTags
, setRecvMigrationTags
, setLocationTags
+ , setBandwidthTags
+ , setBandwidthToLocation
-- * Tag maps
, addTags
, delTags
@@ -98,6 +100,7 @@
, mkNodeGraph
, mkRebootNodeGraph
, haveExclStorage
+ , calcBandwidthToNode
) where
import Prelude ()
@@ -111,6 +114,7 @@
import Data.List (intercalate, foldl', delete, union, sortBy, groupBy)
import qualified Data.Map as Map
import Data.Ord (comparing)
+import Data.Maybe (mapMaybe)
import qualified Data.Set as Set
import Text.Printf (printf)
@@ -213,6 +217,10 @@
-- to
, locationScore :: Int -- ^ Sum of instance location and desired location
-- scores
+ , bandwidthTags :: Set.Set String -- ^ Node's bandwidth tags
+ , bandwidthMap :: Map.Map String Int -- ^ Node's network bandwidth between
+ -- current node and any node with given
+ -- bandwidth tag in Mbit per second
, instanceMap :: Map.Map (String, String) Int -- ^ Number of instances with
-- each exclusion/location tags
-- pair
@@ -384,6 +392,8 @@
, rmigTags = Set.empty
, locationTags = Set.empty
, locationScore = 0
+ , bandwidthTags = Set.empty
+ , bandwidthMap = Map.empty
, instanceMap = Map.empty
}
@@ -435,6 +445,15 @@
setLocationTags :: Node -> Set.Set String -> Node
setLocationTags t val = t { locationTags = val }
+-- | Set the network bandwidth tags
+setBandwidthTags :: Node -> Set.Set String -> Node
+setBandwidthTags t val = t { bandwidthTags = val }
+
+-- | Add network bandwidth to nodes with given bandwidth tag
+setBandwidthToLocation :: Node -> String -> Int -> Node
+setBandwidthToLocation t tag bandwidth = t { bandwidthMap = new_map }
+ where new_map = Map.insert tag bandwidth (bandwidthMap t)
+
-- | Sets the unnaccounted memory.
setXmem :: Node -> Int -> Node
setXmem t val = t { xMem = val }
@@ -556,6 +575,17 @@
. filter (not . Instance.usesMemory)
$ nodeInstances
+-- | Calculate the network bandwidth between two given nodes
+calcBandwidthToNode :: Node -> Node -> Maybe Int
+calcBandwidthToNode src dst =
+ case bndwths of
+ [] -> Nothing
+ _ -> Just $ minimum bndwths
+ where dstTags = Set.toList $ bandwidthTags dst
+ srcMap = bandwidthMap src
+ mapper = flip Map.lookup srcMap
+ bndwths = mapMaybe mapper dstTags
+
-- | Calculates the desired location score of an instance, given its primary
-- node.
getInstanceDsrdLocScore :: Node -- ^ the primary node of the instance
diff --git a/src/Ganeti/HTools/Program/Hbal.hs b/src/Ganeti/HTools/Program/Hbal.hs
index 68572dc..9848238 100644
--- a/src/Ganeti/HTools/Program/Hbal.hs
+++ b/src/Ganeti/HTools/Program/Hbal.hs
@@ -103,6 +103,8 @@
, oMinGainLim
, oDiskMoves
, oAvoidDiskMoves
+ , oLongSolutionThreshold
+ , oAvoidLongSolutions
, oSelInst
, oInstMoves
, oIgnoreSoftErrors
diff --git a/src/Ganeti/HTools/Program/Hscan.hs b/src/Ganeti/HTools/Program/Hscan.hs
index 1fb6a55..f7fdaf3 100644
--- a/src/Ganeti/HTools/Program/Hscan.hs
+++ b/src/Ganeti/HTools/Program/Hscan.hs
@@ -163,7 +163,7 @@
def_socket <- Path.defaultQuerySocket
let lsock = fromMaybe def_socket (optLuxi opts)
let name = local
- input_data <- Luxi.loadData lsock
+ input_data <- Luxi.loadData (optSoR opts) lsock
result <- writeData nlen name opts input_data
unless result . exitWith $ ExitFailure 2
diff --git a/src/Ganeti/HTools/Program/Hspace.hs b/src/Ganeti/HTools/Program/Hspace.hs
index 9db17da..f3c2cd2 100644
--- a/src/Ganeti/HTools/Program/Hspace.hs
+++ b/src/Ganeti/HTools/Program/Hspace.hs
@@ -79,6 +79,7 @@
, oNodeSim
, oRapiMaster
, luxi
+ , oSoR
, oIAllocSrc
, oVerbose
, oQuiet
diff --git a/src/Ganeti/HTools/Tags.hs b/src/Ganeti/HTools/Tags.hs
index 6fd7ebb..3166a57 100644
--- a/src/Ganeti/HTools/Tags.hs
+++ b/src/Ganeti/HTools/Tags.hs
@@ -39,9 +39,12 @@
, getMigRestrictions
, getRecvMigRestrictions
, getLocations
+ , getBandwidth
+ , getBandwidthGraph
+ , mergeByPrefixes
) where
-import Control.Monad (guard, (>=>))
+import Control.Monad ((>=>))
import Data.List (isPrefixOf, isInfixOf, stripPrefix)
import Data.Maybe (mapMaybe)
import qualified Data.Set as S
@@ -49,7 +52,7 @@
import qualified Ganeti.HTools.Node as Node
import Ganeti.HTools.Tags.Constants ( standbyPrefix
, migrationPrefix, allowMigrationPrefix
- , locationPrefix )
+ , locationPrefix, bandwidthPrefix )
-- * Predicates
@@ -79,9 +82,7 @@
-- the parts before and after.
splitAtColons :: String -> Maybe (String, String)
-splitAtColons (':':':':xs) = do
- guard $ not ("::" `isInfixOf` xs)
- return ("", xs)
+splitAtColons (':':':':xs) = return ("", xs)
splitAtColons (x:xs) = do
(as, bs) <- splitAtColons xs
@@ -91,7 +92,12 @@
-- | Get the pairs of allowed migrations from a set of cluster tags.
migrations :: [String] -> [(String, String)]
-migrations = mapMaybe $ stripPrefix allowMigrationPrefix >=> splitAtColons
+migrations = mapMaybe $ stripPrefix allowMigrationPrefix >=> migrationPair
+ where migrationPair s = case splitAtColons s of
+ Just (src, dst) -> if "::" `isInfixOf` dst
+ then Nothing
+ else Just (src, dst)
+ Nothing -> Nothing
-- | Given the cluster tags, extract the set of migration restrictions
-- a node is able to receive from its node tags.
@@ -107,3 +113,51 @@
-- from the node tags.
getLocations :: [String] -> [String] -> S.Set String
getLocations = getTags locationPrefix
+
+-- | Given the cluster tags extract the network bandwidth
+-- from a node tag.
+getBandwidth :: [String] -> [String] -> S.Set String
+getBandwidth = getTags bandwidthPrefix
+
+-- | Split given string on the all "::" occurences
+splitAtColonsList :: String -> [String]
+splitAtColonsList str =
+ case splitAtColons str of
+ Just (f, s) -> f : splitAtColonsList s
+ Nothing -> [str]
+
+-- | Try to parse string into value
+maybeRead :: Read a => String -> Maybe a
+maybeRead s = case reads s of
+ [(x,"")] -> Just x
+ _ -> Nothing
+
+-- | Extract bandwidth graph from cluster tags
+getBandwidthGraph :: [String] -> [(String, String, Int)]
+getBandwidthGraph ctags =
+ let unprefTags = mapMaybe (stripPrefix bandwidthPrefix) ctags
+ tupleList = mapMaybe (listToTuple . splitAtColonsList) unprefTags
+ in mapMaybe parseInt tupleList
+ where parseInt (a, b, s) = case maybeRead s :: Maybe Int of
+ Just i -> Just (a, b, i)
+ Nothing -> Nothing
+ listToTuple (a:b:c:[]) = Just (a, b, c)
+ listToTuple _ = Nothing
+
+-- | Maybe extract string after first occurence of ":" return
+stripFirstPrefix :: String -> Maybe String
+stripFirstPrefix (':':':':_) = Nothing
+stripFirstPrefix (':':_) = Just ""
+stripFirstPrefix (x:xs) =
+ case stripFirstPrefix xs of
+ Just pref -> Just (x:pref)
+ Nothing -> Nothing
+stripFirstPrefix _ = Nothing
+
+-- | Drop all victims having same prefixes from inherits, unite sets
+mergeByPrefixes :: S.Set String -> S.Set String -> S.Set String
+mergeByPrefixes victims inherits =
+ let prefixes = mapMaybe stripFirstPrefix (S.toList inherits)
+ prefixFilter s = not $ any (`isPrefixOf` s) prefixes
+ filtered = S.filter prefixFilter victims
+ in S.union inherits filtered
diff --git a/src/Ganeti/HTools/Tags/Constants.hs b/src/Ganeti/HTools/Tags/Constants.hs
index 3b741ac..d143b7a 100644
--- a/src/Ganeti/HTools/Tags/Constants.hs
+++ b/src/Ganeti/HTools/Tags/Constants.hs
@@ -43,6 +43,7 @@
, migrationPrefix
, allowMigrationPrefix
, locationPrefix
+ , bandwidthPrefix
, desiredLocationPrefix
, standbyAuto
, autoRepairTagPrefix
@@ -75,6 +76,10 @@
locationPrefix :: String
locationPrefix = "htools:nlocation:"
+-- | The prefix for bandwidth between nodes.
+bandwidthPrefix :: String
+bandwidthPrefix = "htools:bandwidth:"
+
-- | The prefix for instance desired location tags.
desiredLocationPrefix :: String
desiredLocationPrefix = "htools:desiredlocation:"
diff --git a/src/Ganeti/JQScheduler.hs b/src/Ganeti/JQScheduler.hs
index 4c594fa..2799d43 100644
--- a/src/Ganeti/JQScheduler.hs
+++ b/src/Ganeti/JQScheduler.hs
@@ -91,6 +91,7 @@
import Ganeti.Logging
import Ganeti.Objects
import Ganeti.Path
+import Ganeti.Query.Exec (forkPostHooksProcess)
import Ganeti.Types
import Ganeti.Utils
import Ganeti.Utils.Livelock
@@ -485,13 +486,29 @@
return died
-- | Trigger job detection for the job with the given job id.
--- Return True, if the job is dead.
+-- If the job is dead, start post hooks execution process and return True
cleanupIfDead :: JQStatus -> JobId -> IO Bool
cleanupIfDead state jid = do
logDebug $ "Extra job-death detection for " ++ show (fromJobId jid)
jobs <- readIORef (jqJobs state)
let jobWS = find ((==) jid . qjId . jJob) $ qRunning jobs
- maybe (return True) (checkForDeath state) jobWS
+ -- and run the post hooks
+ let runHooks = do
+ r <- runResultT . withLock (jqForkLock state)
+ $ forkPostHooksProcess jid
+ let sjid = show $ fromJobId jid
+ logDebug $ genericResult ((++) $ "Error starting post hooks process "
+ ++ "for disappeared job "
+ ++ sjid ++ ":")
+ (\pid -> "Post hooks for disappeared job "
+ ++ sjid ++ "have started in "
+ ++ show pid)
+ r
+ dead <- maybe (return True) (checkForDeath state) jobWS
+ if dead
+ then runHooks
+ else pure ()
+ return dead
-- | Force the queue to check the state of all jobs.
updateStatusAndScheduleSomeJobs :: JQStatus -> IO ()
diff --git a/src/Ganeti/JQScheduler/Filtering.hs b/src/Ganeti/JQScheduler/Filtering.hs
index c031415..99febf5 100644
--- a/src/Ganeti/JQScheduler/Filtering.hs
+++ b/src/Ganeti/JQScheduler/Filtering.hs
@@ -51,6 +51,7 @@
import qualified Text.JSON as J
import Ganeti.BasicTypes
+import Ganeti.Constants (opcodeReasonSrcRlib2, opcodeReasonAuthUser)
import Ganeti.Errors
import Ganeti.Lens hiding (chosen)
import Ganeti.JQScheduler.Types
@@ -88,6 +89,16 @@
. metaParamsL . opReasonL . traverse
+-- | Authenticated RAPI user submitted a job. It's always the last entry
+userOf :: QueuedJob -> String -> String
+userOf job default_user =
+ foldl extractRapiUser default_user $ reasonsOf job
+ where extractRapiUser current_user (source, reason, _) =
+ if source == opcodeReasonSrcRlib2
+ then fromMaybe current_user (stripPrefix opcodeReasonAuthUser reason)
+ else current_user
+
+
-- | Like `evaluateFilterM`, but allowing only `Comparator` operations;
-- all other filter language operations are evaluated as `False`.
--
@@ -142,6 +153,9 @@
"timestamp" -> Just $ NumericValue timestamp `comp` val
_ -> Nothing
in any reasonMatches (reasonsOf job)
+ FPUser fil -> evaluateFilterComparator fil $ \comp field val -> case field of
+ "user" -> Just $ (QuotedString $ userOf job "") `comp` val
+ _ -> Nothing
-- | Whether all predicates of the filter rule are true for the job.
diff --git a/src/Ganeti/JQueue.hs b/src/Ganeti/JQueue.hs
index 736fce6..a56cb52 100644
--- a/src/Ganeti/JQueue.hs
+++ b/src/Ganeti/JQueue.hs
@@ -572,9 +572,10 @@
. mfilter (/= ownlivelock)
. qjLivelock
--- | Waits for a job to finalize its execution.
-waitForJob :: JobId -> Int -> ResultG (Bool, String)
-waitForJob jid tmout = do
+-- | Waits for a job ordered to cancel to react, and returns whether it was
+-- canceled, and a user-intended description of the reason.
+waitForJobCancelation :: JobId -> Int -> ResultG (Bool, String)
+waitForJobCancelation jid tmout = do
qDir <- liftIO queueDir
let jobfile = liveJobFile qDir jid
load = liftM fst <$> loadJobFromDisk qDir False jid
@@ -582,10 +583,13 @@
jobR <- liftIO $ watchFileBy jobfile tmout finalizedR load
case calcJobStatus <$> jobR of
Ok s | s == JOB_STATUS_CANCELED ->
- return (True, "Job successfully cancelled")
+ return (True, "Job successfully canceled")
| finalizedR jobR ->
return (False, "Job exited before it could have been canceled,\
\ status " ++ show s)
+ | s == JOB_STATUS_CANCELING ->
+ return (False, "Job cancelation was not completed before the\
+ \ timeout, but the job may yet be canceled")
| otherwise ->
return (False, "Job could not be canceled, status "
++ show s)
@@ -619,7 +623,7 @@
if calcJobStatus job > JOB_STATUS_WAITING
then return (False, "Job no longer waiting, can't cancel\
\ (informed it anyway)")
- else lift $ waitForJob jid C.luxiCancelJobTimeout
+ else lift $ waitForJobCancelation jid C.luxiCancelJobTimeout
else return (True, "SIGKILL send to the process")
_ -> do
logDebug $ jName ++ " in its startup phase, retrying"
diff --git a/src/Ganeti/MaintD/Server.hs b/src/Ganeti/MaintD/Server.hs
index b88b23e..544c5a9 100644
--- a/src/Ganeti/MaintD/Server.hs
+++ b/src/Ganeti/MaintD/Server.hs
@@ -107,7 +107,7 @@
loadClusterData = do
now <- liftIO getClockTime
socket <- liftIO Path.defaultQuerySocket
- either_inp <- liftIO . tryIOError $ Luxi.loadData socket
+ either_inp <- liftIO . tryIOError $ Luxi.loadData False socket
input_data <- mkResultT $ case either_inp of
Left e -> do
let msg = show e
diff --git a/src/Ganeti/Metad/Config.hs b/src/Ganeti/Metad/Config.hs
index ebd27b2..3162243 100644
--- a/src/Ganeti/Metad/Config.hs
+++ b/src/Ganeti/Metad/Config.hs
@@ -80,9 +80,12 @@
-- > { "os-image": ["http://example.com/disk.img", "public"],
-- > "os-password": ["mypassword", "secret"] }
makeInstanceParams
- :: JSObject JSValue -> JSObject JSValue -> JSObject JSValue -> JSValue
+ :: JSObject JSValue
+ -> JSObject JSValue
+ -> JSObject JSValue
+ -> JSObject JSValue
makeInstanceParams pub priv sec =
- JSObject . JSON.toJSObject $
+ JSON.toJSObject $
addVisibility "public" pub ++
addVisibility "private" priv ++
addVisibility "secret" sec
@@ -92,7 +95,7 @@
addVisibility param params =
map (second (JSArray . (:[key param]))) (JSON.fromJSObject params)
-getOsParamsWithVisibility :: JSValue -> Result JSValue
+getOsParamsWithVisibility :: JSValue -> Result (JSObject JSValue)
getOsParamsWithVisibility json =
do obj <- readJSON json
publicOsParams <- getPublicOsParams obj
diff --git a/src/Ganeti/Metad/WebServer.hs b/src/Ganeti/Metad/WebServer.hs
index 338d3e4..1671ddf 100644
--- a/src/Ganeti/Metad/WebServer.hs
+++ b/src/Ganeti/Metad/WebServer.hs
@@ -43,6 +43,7 @@
import qualified Data.CaseInsensitive as CI
import Data.List (intercalate)
import Data.Map (Map)
+import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.ByteString.Char8 as ByteString (pack, unpack)
import Snap.Core
@@ -99,6 +100,19 @@
ByteString.pack .
JSON.encode $ osParams
+lookupSingleParam :: String -> JSObject JSValue -> Result String
+lookupSingleParam param osParams =
+ case List.lookup param (JSON.fromJSObject osParams) of
+ Nothing -> Error $ "Instance does not have param " ++ param
+ Just v -> head <$> JSON.readJSON v
+
+serveSingleOsParam :: String -> Map String JSValue -> String -> MetaM
+serveSingleOsParam inst params param =
+ do instParams <- lookupInstanceParams inst params
+ maybeResult (Config.getOsParamsWithVisibility instParams >>=
+ lookupSingleParam param) $ \paramValue ->
+ writeBS . ByteString.pack $ paramValue
+
serveOsPackage :: String -> Map String JSValue -> String -> MetaM
serveOsPackage inst params key =
do instParams <- lookupInstanceParams inst params
@@ -164,6 +178,23 @@
\err -> do
liftIO . Logging.logWarning $ "Could not serve OS parameters: " ++ err
error404
+handleMetadata params GET "ganeti" "latest" paramPath | isParamPath paramPath =
+ case split paramPath of
+ -- The validation of the first two entries is done in isParamPath
+ [_, _, param] -> do
+ remoteAddr <- ByteString.unpack . rqRemoteAddr <$> getRequest
+ instanceParams <- liftIO $ do
+ Logging.logInfo $ "OS param " ++ param ++ " for " ++ show remoteAddr
+ readMVar params
+ serveSingleOsParam remoteAddr instanceParams param
+ `catchError`
+ \err -> do
+ liftIO .
+ Logging.logWarning $ "Could not serve single OS param " ++ param ++
+ ": " ++ err
+ error404
+ _ -> error404
+ where isParamPath = (==) ["os", "parameters"] . take 2 . split
handleMetadata params GET "ganeti" "latest" script | isScript script =
do remoteAddr <- ByteString.unpack . rqRemoteAddr <$> getRequest
instanceParams <- liftIO $ do
diff --git a/src/Ganeti/Objects.hs b/src/Ganeti/Objects.hs
index 065aaa8..1741d08 100644
--- a/src/Ganeti/Objects.hs
+++ b/src/Ganeti/Objects.hs
@@ -80,11 +80,12 @@
, Cluster(..)
, ConfigData(..)
, TimeStampObject(..) -- re-exported from Types
- , UuidObject(..) -- re-exported from Types
- , SerialNoObject(..) -- re-exported from Types
- , TagsObject(..) -- re-exported from Types
- , DictObject(..) -- re-exported from THH
- , TagSet -- re-exported from THH
+ , UuidObject(..) -- re-exported from Types
+ , SerialNoObject(..) -- re-exported from Types
+ , TagsObject(..) -- re-exported from Types
+ , DictObject(..) -- re-exported from THH
+ , TagSet(..) -- re-exported from THH
+ , emptyTagSet -- re-exported from THH
, Network(..)
, AddressPool(..)
, Ip4Address()
@@ -525,6 +526,7 @@
= FPJobId (Filter FilterField)
| FPOpCode (Filter FilterField)
| FPReason (Filter FilterField)
+ | FPUser (Filter FilterField)
deriving (Eq, Ord, Show)
@@ -533,6 +535,7 @@
FPJobId expr -> JSArray [string "jobid", showJSON expr]
FPOpCode expr -> JSArray [string "opcode", showJSON expr]
FPReason expr -> JSArray [string "reason", showJSON expr]
+ FPUser expr -> JSArray [string "user", showJSON expr]
where
string = JSString . toJSString
@@ -542,6 +545,7 @@
| name == toJSString "jobid" -> FPJobId <$> readJSON expr
| name == toJSString "opcode" -> FPOpCode <$> readJSON expr
| name == toJSString "reason" -> FPReason <$> readJSON expr
+ | name == toJSString "user" -> FPUser <$> readJSON expr
JSArray (JSString name:params) ->
fail $ "malformed FilterPredicate: bad parameter list for\
\ '" ++ fromJSString name ++ "' predicate: "
@@ -746,4 +750,3 @@
, simpleField "netdev" [t| String |]
, simpleField "ip_family" [t| IpFamily |]
])
-
diff --git a/src/Ganeti/Objects/Disk.hs b/src/Ganeti/Objects/Disk.hs
index f6b3cbb..e50f3f1 100644
--- a/src/Ganeti/Objects/Disk.hs
+++ b/src/Ganeti/Objects/Disk.hs
@@ -152,6 +152,25 @@
lidEncodeType :: DiskLogicalId -> [(String, JSValue)]
lidEncodeType v = [(devType, showJSON . lidDiskType $ v)]
+-- | Returns the storage path or the unique name for a given logical id if
+-- present
+getStorageId :: DiskLogicalId -> Maybe String
+getStorageId dlid =
+ case dlid of
+ LIDPlain lv -> Just $ lvGroup lv ++ "/" ++ lvVolume lv
+ LIDDrbd8 {} -> Nothing
+ LIDFile _ path -> Just path
+ LIDSharedFile _ path -> Just path
+ LIDGluster _ path -> Just path
+ LIDBlockDev _ path -> Just path
+ LIDRados _ path -> Just path
+ LIDExt _ uniqueName -> Just uniqueName
+
+-- | Returns the provider for ExtStorage and Nothing otherwise
+getExtProvider :: DiskLogicalId -> Maybe String
+getExtProvider (LIDExt provider _) = Just provider
+getExtProvider _ = Nothing
+
-- | Custom encoder for DiskLogicalId (logical id only).
encodeDLId :: DiskLogicalId -> JSValue
encodeDLId (LIDPlain (LogicalVolume vg lv)) =
diff --git a/src/Ganeti/Objects/Lens.hs b/src/Ganeti/Objects/Lens.hs
index 3f27981..8b8da74 100644
--- a/src/Ganeti/Objects/Lens.hs
+++ b/src/Ganeti/Objects/Lens.hs
@@ -40,7 +40,6 @@
import qualified Data.ByteString.UTF8 as UTF8
import Control.Lens (Simple)
import Control.Lens.Iso (Iso, iso)
-import qualified Data.Set as Set
import System.Time (ClockTime(..))
import Ganeti.Lens (makeCustomLenses, Lens')
@@ -64,7 +63,7 @@
-- | Class of objects that have tags.
class TagsObject a => TagsObjectL a where
- tagsL :: Lens' a (Set.Set String)
+ tagsL :: Lens' a (TagSet)
$(makeCustomLenses ''AddressPool)
diff --git a/src/Ganeti/Path.hs b/src/Ganeti/Path.hs
index 8c02dea..e96bbb5 100644
--- a/src/Ganeti/Path.hs
+++ b/src/Ganeti/Path.hs
@@ -58,6 +58,7 @@
, instanceReasonDir
, getInstReasonFilename
, jqueueExecutorPy
+ , postHooksExecutorPy
, kvmPidDir
) where
@@ -192,6 +193,12 @@
jqueueExecutorPy = return $ versionedsharedir
</> "ganeti" </> "jqueue" </> "exec.py"
+-- | The path to the Python executable for global post hooks of job which
+-- process has disappeared.
+postHooksExecutorPy :: IO FilePath
+postHooksExecutorPy =
+ return $ versionedsharedir </> "ganeti" </> "jqueue" </> "post_hooks_exec.py"
+
-- | The path to the directory where kvm stores the pid files.
kvmPidDir :: IO FilePath
kvmPidDir = runDir `pjoin` "kvm-hypervisor" `pjoin` "pid"
diff --git a/src/Ganeti/Query/Cluster.hs b/src/Ganeti/Query/Cluster.hs
index a1d4255..33690cf 100644
--- a/src/Ganeti/Query/Cluster.hs
+++ b/src/Ganeti/Query/Cluster.hs
@@ -54,7 +54,7 @@
clusterMasterNodeName cfg =
let cluster = configCluster cfg
masterNodeUuid = clusterMasterNode cluster
- in liftM nodeName $ getNode cfg masterNodeUuid
+ in liftM nodeName $ getNodeByUuid cfg masterNodeUuid
isWatcherPaused :: IO (Maybe Integer)
isWatcherPaused = do
diff --git a/src/Ganeti/Query/Exec.hs b/src/Ganeti/Query/Exec.hs
index 79889ff..62992a1 100644
--- a/src/Ganeti/Query/Exec.hs
+++ b/src/Ganeti/Query/Exec.hs
@@ -55,9 +55,12 @@
-}
+{-# LANGUAGE FlexibleContexts #-}
+
module Ganeti.Query.Exec
( isForkSupported
, forkJobProcess
+ , forkPostHooksProcess
) where
import Prelude ()
@@ -120,7 +123,6 @@
filterReadable :: (Read a) => [String] -> [a]
filterReadable = mapMaybe (fmap fst . listToMaybe . reads)
-
-- | Catches a potential `IOError` and sets its description via
-- `annotateIOError`. This makes exceptions more informative when they
-- are thrown from an unnamed `Handle`.
@@ -128,10 +130,19 @@
rethrowAnnotateIOError desc =
modifyIOError (\e -> annotateIOError e desc Nothing Nothing)
--- Code that is executed in a @fork@-ed process and that the replaces iteself
--- with the actual job process
-runJobProcess :: JobId -> Client -> IO ()
-runJobProcess jid s = withErrorLogAt CRITICAL (show jid) $
+
+-- | Code that is executed in a @fork@-ed process. Performs communication with
+-- the parent process by calling commFn and then runs pyExecIO python
+-- executable.
+runProcess :: JobId -- ^ a job to process
+ -> Client -- ^ UDS transport
+ -> IO FilePath -- ^ path to the python executable
+ -> ((String -> IO ()) -> JobId -> Client -> IO Fd)
+ -- ^ pre-execution function communicating with the parent. The
+ -- function returns the file descriptor which should
+ -- remain open
+ -> IO ()
+runProcess jid s pyExecIO commFn = withErrorLogAt CRITICAL (show jid) $
do
-- Close the standard error to prevent anything being written there
-- (for example by exceptions when closing unneeded FDs).
@@ -144,20 +155,7 @@
let logLater _ = return ()
logLater $ "Forking a new process for job " ++ show (fromJobId jid)
-
- -- Create a livelock file for the job
- (TOD ts _) <- getClockTime
- lockfile <- P.livelockFile $ printf "job_%06d_%d" (fromJobId jid) ts
-
- -- Lock the livelock file
- logLater $ "Locking livelock file " ++ show lockfile
- fd <- lockFile lockfile >>= annotateResult "Can't lock the livelock file"
- logLater "Sending the lockfile name to the master process"
- sendMsg s lockfile
-
- logLater "Waiting for the master process to confirm the lock"
- _ <- recvMsg s
-
+ preserve_fd <- commFn logLater jid s
-- close the client
logLater "Closing the client"
(clFdR, clFdW) <- clientToFd s
@@ -171,21 +169,20 @@
closeFd clFdR
closeFd clFdW
- fds <- (filter (> 2) . filter (/= fd)) <$> toErrorBase listOpenFds
+ fds <- (filter (> 2) . filter (/= preserve_fd)) <$> toErrorBase listOpenFds
logLater $ "Closing every superfluous file descriptor: " ++ show fds
mapM_ (tryIOError . closeFd) fds
- -- the master process will send the job id and the livelock file name
- -- using the same protocol to the job process
- -- we pass the job id as the first argument to the process;
- -- while the process never uses it, it's very convenient when listing
- -- job processes
+ -- The master process will send the job id and the livelock file name
+ -- using the same protocol. We pass the job id as the first argument
+ -- to the process. While the process never uses it, it's very convenient
+ -- when listing job processes.
use_debug <- isDebugMode
env <- (M.insert "GNT_DEBUG" (if use_debug then "1" else "0")
. M.insert "PYTHONPATH" AC.versionedsharedir
. M.fromList)
`liftM` getEnvironment
- execPy <- P.jqueueExecutorPy
+ execPy <- pyExecIO
logLater $ "Executing " ++ AC.pythonPath ++ " " ++ execPy
++ " with PYTHONPATH=" ++ AC.versionedsharedir
() <- executeFile AC.pythonPath True [execPy, show (fromJobId jid)]
@@ -224,6 +221,63 @@
$ closeClient child
return (pid, master)
+-- | Kill the process with the id provided.
+killProcessOnError :: (FromString e, Show e)
+ => ProcessID -- ^ job process pid
+ -> Client -- ^ UDS client connected to the master node
+ -> (String -> ResultT e (WriterLogT IO) ())
+ -- ^ log function
+ -> ResultT e (WriterLogT IO) ()
+killProcessOnError pid master logFn = do
+ logFn "Closing the pipe to the client"
+ withErrorLogAt WARNING "Closing the communication pipe failed"
+ (liftIO (closeClient master)) `orElse` return ()
+ killIfAlive [sigTERM, sigABRT, sigKILL]
+ where killIfAlive [] = return ()
+ killIfAlive (sig : sigs) = do
+ logFn "Getting the status of the process"
+ status <- tryError . liftIO $ getProcessStatus False True pid
+ case status of
+ Left e -> logFn $ "Job process already gone: " ++ show e
+ Right (Just s) -> logFn $ "Child process status: " ++ show s
+ Right Nothing -> do
+ logFn $ "Child process running, killing by " ++ show sig
+ liftIO $ signalProcess sig pid
+ unless (null sigs) $ do
+ threadDelay 100000 -- wait for 0.1s and check again
+ killIfAlive sigs
+
+-- | Data type used only to define the return type of forkProcessCatchErrors.
+data ForkProcessRet = ForkJob (FilePath, ProcessID) |
+ ForkPostHooks ProcessID
+
+-- | Forks current process and runs runFn in the child and commFn in the
+-- parent. Due to a bug in GHC forking process, we want to retry if the forked
+-- process fails to start. If it fails later on, the failure is handled by
+-- 'ResultT' and no retry is performed.
+forkProcessCatchErrors :: (Show e, FromString e)
+ => (Client -> IO ())
+ -> (ProcessID -> String -> ResultT e (WriterLogT IO) ())
+ -> (ProcessID -> Client
+ -> ResultT e (WriterLogT IO) ForkProcessRet)
+ -> ResultT e IO ForkProcessRet
+forkProcessCatchErrors runFn logFn commFn = do
+ -- Due to a bug in GHC forking process, we want to retry
+ -- if the forked process fails to start.
+ -- If it fails later on, the failure is handled by 'ResultT'
+ -- and no retry is performed.
+ let execWriterLogInside = ResultT . execWriterLogT . runResultT
+ retryErrorN C.luxidRetryForkCount
+ $ \tryNo -> execWriterLogInside $ do
+ let maxWaitUS = 2^(tryNo - 1) * C.luxidRetryForkStepUS
+ when (tryNo >= 2) . liftIO $ delayRandom (0, maxWaitUS)
+
+ (pid, master) <- liftIO $ forkWithPipe connectConfig runFn
+
+ logFn pid "Forked a new process"
+ flip catchError (\e -> killProcessOnError pid master (logFn pid)
+ >> throwError e) $ commFn pid master
+
-- | Forks the job process and starts processing of the given job.
-- Returns the livelock of the job and its process ID.
forkJobProcess :: (FromString e, Show e)
@@ -234,64 +288,33 @@
-- and process id in the job file
-> ResultT e IO (FilePath, ProcessID)
forkJobProcess job luxiLivelock update = do
- let jidStr = show . fromJobId . qjId $ job
-
- -- Retrieve secret parameters if present
- let secretParams = encodeStrict . filterSecretParameters . qjOps $ job
logDebug $ "Setting the lockfile temporarily to " ++ luxiLivelock
++ " for job " ++ jidStr
update luxiLivelock
- -- Due to a bug in GHC forking process, we want to retry,
- -- if the forked process fails to start.
- -- If it fails later on, the failure is handled by 'ResultT'
- -- and no retry is performed.
- let execWriterLogInside = ResultT . execWriterLogT . runResultT
- retryErrorN C.luxidRetryForkCount
- $ \tryNo -> execWriterLogInside $ do
- let maxWaitUS = 2^(tryNo - 1) * C.luxidRetryForkStepUS
- when (tryNo >= 2) . liftIO $ delayRandom (0, maxWaitUS)
+ ForkJob ret <- forkProcessCatchErrors (childMain . qjId $ job) logDebugJob
+ parentMain
+ return ret
+ where
+ -- Retrieve secret parameters if present
+ secretParams = encodeStrict . filterSecretParameters . qjOps $ job
+ jidStr = show . fromJobId . qjId $ job
+ jobLogPrefix pid = "[start:job-" ++ jidStr ++ ",pid=" ++ show pid ++ "] "
+ logDebugJob pid = logDebug . (jobLogPrefix pid ++)
- (pid, master) <- liftIO $ forkWithPipe connectConfig (runJobProcess
- . qjId $ job)
-
- let jobLogPrefix = "[start:job-" ++ jidStr ++ ",pid=" ++ show pid ++ "] "
- logDebugJob = logDebug . (jobLogPrefix ++)
-
- logDebugJob "Forked a new process"
-
- let killIfAlive [] = return ()
- killIfAlive (sig : sigs) = do
- logDebugJob "Getting the status of the process"
- status <- tryError . liftIO $ getProcessStatus False True pid
- case status of
- Left e -> logDebugJob $ "Job process already gone: " ++ show e
- Right (Just s) -> logDebugJob $ "Child process status: " ++ show s
- Right Nothing -> do
- logDebugJob $ "Child process running, killing by " ++ show sig
- liftIO $ signalProcess sig pid
- unless (null sigs) $ do
- threadDelay 100000 -- wait for 0.1s and check again
- killIfAlive sigs
-
- let onError = do
- logDebugJob "Closing the pipe to the client"
- withErrorLogAt WARNING "Closing the communication pipe failed"
- (liftIO (closeClient master)) `orElse` return ()
- killIfAlive [sigTERM, sigABRT, sigKILL]
-
- flip catchError (\e -> onError >> throwError e)
- $ do
+ -- | Code performing communication with the child process. First, receive
+ -- the livelock, then send necessary parameters to the python child.
+ parentMain pid master = do
let annotatedIO msg k = do
- logDebugJob msg
- liftIO $ rethrowAnnotateIOError (jobLogPrefix ++ msg) k
+ logDebugJob pid msg
+ liftIO $ rethrowAnnotateIOError (jobLogPrefix pid ++ msg) k
let recv msg = annotatedIO msg (recvMsg master)
send msg x = annotatedIO msg (sendMsg master x)
lockfile <- recv "Getting the lockfile of the client"
- logDebugJob $ "Setting the lockfile to the final " ++ lockfile
+ logDebugJob pid ("Setting the lockfile to the final " ++ lockfile)
toErrorBase $ update lockfile
send "Confirming the client it can start" ""
@@ -299,13 +322,62 @@
_ <- recv "Waiting for the job to ask for the job id"
send "Writing job id to the client" jidStr
-
_ <- recv "Waiting for the job to ask for the lock file name"
send "Writing the lock file name to the client" lockfile
-
_ <- recv "Waiting for the job to ask for secret parameters"
send "Writing secret parameters to the client" secretParams
+ liftIO $ closeClient master
+ return $ ForkJob (lockfile, pid)
+
+ -- | Code performing communication with the parent process. During
+ -- communication the livelock is created, locked and sent back
+ -- to the parent.
+ childMain jid s = runProcess jid s P.jqueueExecutorPy commFn
+ where
+ commFn logFn jid' s' = do
+ -- Create a livelock file for the job
+ (TOD ts _) <- getClockTime
+ lockfile <- P.livelockFile $ printf "job_%06d_%d" (fromJobId jid') ts
+ -- Lock the livelock file
+ _ <- logFn $ "Locking livelock file " ++ show lockfile
+ fd <- lockFile lockfile >>= annotateResult "Can't lock the livelock"
+ _ <- logFn "Sending the lockfile name to the master process"
+ sendMsg s' lockfile
+ _ <- logFn "Waiting for the master process to confirm the lock"
+ _ <- recvMsg s'
+ return fd
+
+-- | Forks the process and starts the processing of post hooks for the opcode
+-- whose execution was unfinished due to job process disappearing.
+forkPostHooksProcess :: (FromString e, Show e)
+ => JobId
+ -> ResultT e IO ProcessID
+forkPostHooksProcess jid = do
+ ForkPostHooks ret <- forkProcessCatchErrors (childMain jid) logDebugJob
+ parentMain
+ return ret
+ where
+ jidStr = show $ fromJobId jid
+ jobLogPrefix pid = "[start:post_hooks:job-" ++ jidStr ++ ",pid="
+ ++ show pid ++ "] "
+ logDebugJob pid = logDebug . (jobLogPrefix pid ++)
+
+ -- | Code performing communication with the child process. First, receive
+ -- livelock, then send necessary parameters to the python child.
+ parentMain pid master = do
+ let annotatedIO msg k = do
+ logDebugJob pid msg
+ liftIO $ rethrowAnnotateIOError (jobLogPrefix pid ++ msg) k
+ let recv msg = annotatedIO msg (recvMsg master)
+ send msg x = annotatedIO msg (sendMsg master x)
+ -- We communicate with the Python process
+ _ <- recv "Waiting for the post hooks executor to ask for the job id"
+ send "Writing job id to the client" jidStr
liftIO $ closeClient master
+ return $ ForkPostHooks pid
- return (lockfile, pid)
+ -- | Code performing communication with the parent process. Python part
+ -- will only read job file so, we don't need livelock here.
+ childMain jid' s = runProcess jid' s P.postHooksExecutorPy commFn
+ where commFn _ _ _ = return (0 :: Fd)
diff --git a/src/Ganeti/Query/Instance.hs b/src/Ganeti/Query/Instance.hs
index 8ea2c0f..ea599dd 100644
--- a/src/Ganeti/Query/Instance.hs
+++ b/src/Ganeti/Query/Instance.hs
@@ -207,6 +207,12 @@
, (FieldDefinition "disk.uuids" "Disk_UUIDs" QFTOther
"List of disk UUIDs",
FieldConfig getDiskUuids, QffNormal)
+ , (FieldDefinition "disk.storage_ids" "Disk_storage_ids" QFTOther
+ "List of disk storage ids",
+ FieldConfig getStorageIds, QffNormal)
+ , (FieldDefinition "disk.providers" "Disk_providers" QFTOther
+ "List of disk ExtStorage providers",
+ FieldConfig getProviders, QffNormal)
-- For pre-2.14 backwards compatibility
, (FieldDefinition "disk_template" "Disk_template" QFTText
"Instance disk template",
@@ -217,18 +223,18 @@
instantiateIndexedFields C.maxDisks
[ (fieldDefinitionCompleter "disk.size/%d" "Disk/%d" QFTUnit
"Disk size of %s disk",
- getIndexedOptionalConfField getInstDisksFromObj diskSize,
+ getIndexedOptionalConfField getInstDisks diskSize,
QffNormal)
, (fieldDefinitionCompleter "disk.spindles/%d" "DiskSpindles/%d" QFTNumber
"Spindles of %s disk",
- getIndexedOptionalConfField getInstDisksFromObj diskSpindles,
+ getIndexedOptionalConfField getInstDisks diskSpindles,
QffNormal)
, (fieldDefinitionCompleter "disk.name/%d" "DiskName/%d" QFTText
"Name of %s disk",
- getIndexedOptionalConfField getInstDisksFromObj diskName, QffNormal)
+ getIndexedOptionalConfField getInstDisks diskName, QffNormal)
, (fieldDefinitionCompleter "disk.uuid/%d" "DiskUUID/%d" QFTText
"UUID of %s disk",
- getIndexedConfField getInstDisksFromObj uuidOf, QffNormal)
+ getIndexedConfField getInstDisks uuidOf, QffNormal)
] ++
-- Aggregate nic parameter fields
@@ -381,7 +387,7 @@
-- visible to the instance.
getDiskSizeRequirements :: ConfigData -> Instance -> ResultEntry
getDiskSizeRequirements cfg inst =
- rsErrorNoData . liftA (sum . map getSize) . getInstDisksFromObj cfg $ inst
+ rsErrorNoData . liftA (sum . map getSize) . getInstDisks cfg $ inst
where
diskType x = lidDiskType <$> diskLogicalId x
getSize :: Disk -> Int
@@ -397,24 +403,41 @@
getDiskSizes :: ConfigData -> Instance -> ResultEntry
getDiskSizes cfg =
rsErrorNoData . liftA (map $ MaybeForJSON . diskSize)
- . getInstDisksFromObj cfg
+ . getInstDisks cfg
-- | Get a list of disk spindles
getDiskSpindles :: ConfigData -> Instance -> ResultEntry
getDiskSpindles cfg =
rsErrorNoData . liftA (map (MaybeForJSON . diskSpindles)) .
- getInstDisksFromObj cfg
+ getInstDisks cfg
-- | Get a list of disk names for an instance
getDiskNames :: ConfigData -> Instance -> ResultEntry
getDiskNames cfg =
rsErrorNoData . liftA (map (MaybeForJSON . diskName)) .
- getInstDisksFromObj cfg
+ getInstDisks cfg
-- | Get a list of disk UUIDs for an instance
getDiskUuids :: ConfigData -> Instance -> ResultEntry
getDiskUuids cfg =
- rsErrorNoData . liftA (map uuidOf) . getInstDisksFromObj cfg
+ rsErrorNoData . liftA (map uuidOf) . getInstDisks cfg
+
+-- | Get a list of ExtStorage providers for an instance
+getProviders :: ConfigData -> Instance -> ResultEntry
+getProviders cfg =
+ rsErrorNoData . liftA (map MaybeForJSON . filter isJust
+ . mapMaybe getProvider) . getInstDisks cfg
+ where
+ getProvider x = getExtProvider <$> diskLogicalId x
+
+-- | Get a list of disk storage ids for an instance. Note that for DRBD the list
+-- will be empty.
+getStorageIds :: ConfigData -> Instance -> ResultEntry
+getStorageIds cfg = rsErrorNoData .
+ liftA (map MaybeForJSON . filter isJust . mapMaybe storageId) .
+ getInstDisks cfg
+ where
+ storageId x = getStorageId <$> diskLogicalId x
-- | Creates a functions which produces a FieldConfig 'FieldGetter' when fed
-- an index. Works for fields that may not return a value, expressed through
@@ -705,7 +728,7 @@
isPrimaryOffline cfg inst =
let pNodeResult = maybe (Bad $ ParameterError "no primary node") return
(instPrimaryNode inst)
- >>= getNode cfg
+ >>= getNodeByUuid cfg
in case pNodeResult of
Ok pNode -> nodeOffline pNode
Bad _ -> error "Programmer error - result assumed to be OK is Bad!"
@@ -923,7 +946,7 @@
nub . justOk
$ map ( maybe (Bad $ ParameterError "no primary node") return
. instPrimaryNode
- >=> getNode cfg) instances
+ >=> getNodeByUuid cfg) instances
goodNodes = nodesWithValidConfig cfg instanceNodes
instInfoRes <- executeRpcCall goodNodes (RpcCallAllInstancesInfo hvSpecs)
consInfoRes <-
@@ -939,7 +962,7 @@
-- | An aggregate disk attribute for backward compatibility.
getDiskTemplate :: ConfigData -> Instance -> ResultEntry
getDiskTemplate cfg inst =
- let disks = getInstDisksFromObj cfg inst
+ let disks = getInstDisks cfg inst
getDt x = lidDiskType <$> diskLogicalId x
disk_types :: ErrorResult [DiskTemplate]
disk_types = nub <$> catMaybes <$> map getDt <$> disks
diff --git a/src/Ganeti/THH/Field.hs b/src/Ganeti/THH/Field.hs
index 9b444e9..1d3752b 100644
--- a/src/Ganeti/THH/Field.hs
+++ b/src/Ganeti/THH/Field.hs
@@ -43,7 +43,8 @@
, timeStampFields
, uuidFields
, serialFields
- , TagSet
+ , TagSet(..)
+ , emptyTagSet
, tagsFields
, fileModeAsIntField
, processIdField
@@ -121,12 +122,21 @@
uuidFields :: [Field]
uuidFields = [ presentInForthcoming $ simpleField "uuid" [t| BS.ByteString |] ]
--- | Tag set type alias.
-type TagSet = Set.Set String
+-- | Tag set type.
+newtype TagSet = TagSet { fromTagSet :: Set.Set String }
+ deriving (Show, Eq)
+
+instance JSON.JSON TagSet where
+ readJSON = liftM TagSet . JSON.readJSON
+ showJSON = JSON.showJSON . fromTagSet
+
+-- | The empty tag set.
+emptyTagSet :: TagSet
+emptyTagSet = TagSet Set.empty
-- | Tag field description.
tagsFields :: [Field]
-tagsFields = [ defaultField [| Set.empty |] $
+tagsFields = [ defaultField [| emptyTagSet |] $
simpleField "tags" [t| TagSet |] ]
-- ** Fields related to POSIX data types
diff --git a/src/Ganeti/Types.hs b/src/Ganeti/Types.hs
index 8da06d4..66507cc 100644
--- a/src/Ganeti/Types.hs
+++ b/src/Ganeti/Types.hs
@@ -197,12 +197,12 @@
import qualified Text.JSON as JSON
import Text.JSON (JSON, readJSON, showJSON)
import Data.Ratio (numerator, denominator)
-import qualified Data.Set as Set
import System.Time (ClockTime)
import qualified Ganeti.ConstantUtils as ConstantUtils
import Ganeti.JSON (Container, HasStringRepr(..))
import qualified Ganeti.THH as THH
+import qualified Ganeti.THH.Field as THH (TagSet)
import Ganeti.Utils
-- * Generic types
@@ -1071,5 +1071,4 @@
-- | Class of objects that have tags.
class TagsObject a where
- tagsOf :: a -> Set.Set String
-
+ tagsOf :: a -> THH.TagSet
diff --git a/src/Ganeti/Utils.hs b/src/Ganeti/Utils.hs
index 0c2a0ac..1eee5e3 100644
--- a/src/Ganeti/Utils.hs
+++ b/src/Ganeti/Utils.hs
@@ -4,7 +4,7 @@
{-
-Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc.
+Copyright (C) 2009, 2010, 2011, 2012, 2013, 2015 Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -44,6 +44,8 @@
, applyIf
, commaJoin
, ensureQuoted
+ , divideList
+ , balancedSum
, tryRead
, readMaybe
, formatTable
@@ -213,24 +215,34 @@
threadDelaySeconds :: Int -> IO ()
threadDelaySeconds = threadDelay . (*) 1000000
+-- | Split a list into two lists of approximately the same length.
+divideList :: [a] -> ([a], [a])
+divideList [] = ([], [])
+divideList [a] = ([a], [])
+divideList (a:b:xs) = let (ls, rs) = divideList xs in (a:ls, b:rs)
+
-- * Mathematical functions
+-- | Compute the sum of a list of numbers, all about the same value,
+-- and do so in a balanced way to avoid adding numbers of too different
+-- values (and thus too bad inaccuracies).
+balancedSum :: Num a => [a] -> a
+balancedSum [] = 0
+balancedSum [x] = x
+balancedSum xs = let (ls, rs) = divideList xs
+ in balancedSum ls + balancedSum rs
+
-- Simple and slow statistical functions, please replace with better
-- versions
-- | Standard deviation function.
stdDev :: [Double] -> Double
stdDev lst =
- -- first, calculate the list length and sum lst in a single step,
- -- for performance reasons
- let (ll', sx) = foldl' (\(rl, rs) e ->
- let rl' = rl + 1
- rs' = rs + e
- in rl' `seq` rs' `seq` (rl', rs')) (0::Int, 0) lst
- ll = fromIntegral ll'::Double
- mv = sx / ll
- av = foldl' (\accu em -> let d = em - mv in accu + d * d) 0.0 lst
- in sqrt (av / ll) -- stddev
+ let len = fromIntegral $ length lst
+ mean = balancedSum lst / len
+ sqDist x = let d = x - mean in d * d
+ variance = balancedSum (map sqDist lst) / len
+ in sqrt variance
-- * Logical functions
@@ -502,6 +514,9 @@
cTimeToClockTime (CTime timet) = TOD (toInteger timet) 0
-- | A version of `diffClockTimes` that works around ghc bug #2519.
+--
+-- FIXME: diffClocktimes uses local time (badly), so it still has
+-- issues when daylight savings time. Move to new time library!
diffClockTimes :: ClockTime -> ClockTime -> TimeDiff
diffClockTimes t1 t2 =
let delta = STime.diffClockTimes t1 t2
diff --git a/src/Ganeti/Utils/Statistics.hs b/src/Ganeti/Utils/Statistics.hs
index ff91d93..cd830fb 100644
--- a/src/Ganeti/Utils/Statistics.hs
+++ b/src/Ganeti/Utils/Statistics.hs
@@ -49,6 +49,8 @@
import Data.List (foldl')
import qualified Data.Map as Map
+import Ganeti.Utils (balancedSum)
+
-- | Typeclass describing necessary statistical accumulations functions. Types
-- defining an instance of Stat behave as if the given statistics were computed
-- on the list of values, but they allow a potentially more efficient update of
@@ -77,26 +79,21 @@
instance Stat Double SumStat where
calculate xs =
- let addComponent s x =
- let !s' = s + x
- in s'
- st = foldl' addComponent 0 xs
- in SumStat st
+ let !sx = balancedSum xs
+ in SumStat sx
update (SumStat s) x x' =
- SumStat $ s + x' - x
+ let !sx' = s + (x' -x)
+ in SumStat sx'
getValue (SumStat s) = s
instance Stat Double StdDevStat where
calculate xs =
- let addComponent (n, s) x =
- let !n' = n + 1
- !s' = s + x
- in (n', s')
- (nt, st) = foldl' addComponent (0, 0) xs
- mean = st / nt
- center x = x - mean
- nvar = foldl' (\v x -> let d = center x in v + d * d) 0 xs
- in StdDevStat nt st (nvar / nt)
+ let !n = fromIntegral $ length xs
+ !sx = balancedSum xs
+ !mean = sx / n
+ sqDist x = let d = x - mean in d * d
+ !var = balancedSum (map sqDist xs) / n
+ in StdDevStat n sx var
update (StdDevStat n s var) x x' =
let !ds = x' - x
!dss = x' * x' - x * x
diff --git a/src/Ganeti/WConfd/Ssconf.hs b/src/Ganeti/WConfd/Ssconf.hs
index 3900578..1118735 100644
--- a/src/Ganeti/WConfd/Ssconf.hs
+++ b/src/Ganeti/WConfd/Ssconf.hs
@@ -88,7 +88,7 @@
mkSSConf :: ConfigData -> SSConf
mkSSConf cdata = SSConf . M.fromList $
[ (SSClusterName, return $ clusterClusterName cluster)
- , (SSClusterTags, toList $ tagsOf cluster)
+ , (SSClusterTags, toList . fromTagSet $ tagsOf cluster)
, (SSFileStorageDir, return $ clusterFileStorageDir cluster)
, (SSSharedFileStorageDir, return $ clusterSharedFileStorageDir cluster)
, (SSGlusterStorageDir, return $ clusterGlusterStorageDir cluster)
@@ -101,7 +101,7 @@
, (SSMasterNetmask, return . show $ clusterMasterNetmask cluster)
, (SSMasterNode, return
. genericResult (const "NO MASTER") nodeName
- . getNode cdata $ clusterMasterNode cluster)
+ . getNodeByUuid cdata $ clusterMasterNode cluster)
, (SSNodeList, mapLines nodeName nodes)
, (SSNodePrimaryIps, mapLines (spcPair . (nodeName &&& nodePrimaryIp))
nodes )
diff --git a/test/data/cluster_config_2.18.json b/test/data/cluster_config_2.18.json
new file mode 100644
index 0000000..64d62fe
--- /dev/null
+++ b/test/data/cluster_config_2.18.json
@@ -0,0 +1,669 @@
+{
+ "cluster": {
+ "beparams": {
+ "default": {
+ "always_failover": false,
+ "auto_balance": true,
+ "maxmem": 128,
+ "minmem": 128,
+ "spindle_use": 1,
+ "vcpus": 1
+ }
+ },
+ "blacklisted_os": [],
+ "candidate_certs": {},
+ "candidate_pool_size": 10,
+ "cluster_name": "cluster.name.example.com",
+ "compression_tools": [
+ "gzip",
+ "gzip-fast",
+ "gzip-slow"
+ ],
+ "ctime": 1343869045.6048839,
+ "data_collectors": {
+ "cpu-avg-load": {
+ "active": true,
+ "interval": 5000000.0
+ },
+ "diagnose": {
+ "active": true,
+ "intervall": 5000000.0
+ },
+ "diskstats": {
+ "active": true,
+ "interval": 5000000.0
+ },
+ "drbd": {
+ "active": true,
+ "interval": 5000000.0
+ },
+ "inst-status-xen": {
+ "active": true,
+ "interval": 5000000.0
+ },
+ "kvm-inst-rss": {
+ "active": true,
+ "interval": 5000000.0
+ },
+ "lv": {
+ "active": true,
+ "interval": 5000000.0
+ },
+ "xen-cpu-avg-load": {
+ "active": true,
+ "interval": 5000000.0
+ }
+ },
+ "default_iallocator": "hail",
+ "default_iallocator_params": {},
+ "diagnose_data_collector_filename": "",
+ "disk_state_static": {},
+ "diskparams": {
+ "blockdev": {},
+ "diskless": {},
+ "drbd": {
+ "c-delay-target": 1,
+ "c-fill-target": 200,
+ "c-max-rate": 2048,
+ "c-min-rate": 1024,
+ "c-plan-ahead": 1,
+ "data-stripes": 2,
+ "disk-barriers": "bf",
+ "disk-custom": "",
+ "dynamic-resync": false,
+ "meta-barriers": true,
+ "meta-stripes": 2,
+ "metavg": "xenvg",
+ "net-custom": "",
+ "protocol": "C",
+ "resync-rate": 1024
+ },
+ "ext": {
+ "access": "kernelspace"
+ },
+ "file": {},
+ "gluster": {
+ "access": "kernelspace",
+ "host": "127.0.0.1",
+ "port": 24007,
+ "volume": "gv0"
+ },
+ "plain": {
+ "stripes": 2
+ },
+ "rbd": {
+ "access": "kernelspace",
+ "pool": "rbd"
+ },
+ "sharedfile": {}
+ },
+ "drbd_usermode_helper": "/bin/true",
+ "enabled_disk_templates": [
+ "drbd",
+ "plain",
+ "file",
+ "sharedfile"
+ ],
+ "enabled_hypervisors": [
+ "xen-pvm"
+ ],
+ "enabled_user_shutdown": false,
+ "file_storage_dir": "",
+ "gluster_storage_dir": "",
+ "hidden_os": [],
+ "highest_used_port": 32105,
+ "hv_state_static": {
+ "xen-pvm": {
+ "cpu_node": 1,
+ "cpu_total": 1,
+ "mem_hv": 0,
+ "mem_node": 0,
+ "mem_total": 0
+ }
+ },
+ "hvparams": {
+ "chroot": {
+ "init_script": "/ganeti-chroot"
+ },
+ "fake": {
+ "migration_mode": "live"
+ },
+ "kvm": {
+ "acpi": true,
+ "boot_order": "disk",
+ "cdrom2_image_path": "",
+ "cdrom_disk_type": "",
+ "cdrom_image_path": "",
+ "cpu_cores": 0,
+ "cpu_mask": "all",
+ "cpu_sockets": 0,
+ "cpu_threads": 0,
+ "cpu_type": "",
+ "disk_aio": "threads",
+ "disk_cache": "default",
+ "disk_type": "paravirtual",
+ "floppy_image_path": "",
+ "initrd_path": "",
+ "kernel_args": "ro",
+ "kernel_path": "/boot/vmlinuz-kvmU",
+ "keymap": "",
+ "kvm_extra": "",
+ "kvm_flag": "",
+ "kvm_path": "/usr/bin/kvm",
+ "machine_version": "",
+ "mem_path": "",
+ "migration_bandwidth": 4,
+ "migration_caps": "",
+ "migration_downtime": 30,
+ "migration_mode": "live",
+ "migration_port": 4041,
+ "nic_type": "paravirtual",
+ "reboot_behavior": "reboot",
+ "root_path": "/dev/vda1",
+ "security_domain": "",
+ "security_model": "none",
+ "serial_console": true,
+ "serial_speed": 38400,
+ "soundhw": "",
+ "spice_bind": "",
+ "spice_image_compression": "",
+ "spice_ip_version": 0,
+ "spice_jpeg_wan_compression": "",
+ "spice_password_file": "",
+ "spice_playback_compression": true,
+ "spice_streaming_video": "",
+ "spice_tls_ciphers": "HIGH:-DES:-3DES:-EXPORT:-ADH",
+ "spice_use_tls": false,
+ "spice_use_vdagent": true,
+ "spice_zlib_glz_wan_compression": "",
+ "usb_devices": "",
+ "usb_mouse": "",
+ "use_chroot": false,
+ "use_localtime": false,
+ "user_shutdown": false,
+ "vga": "",
+ "vhost_net": false,
+ "virtio_net_queues": 1,
+ "vnc_bind_address": "",
+ "vnc_password_file": "",
+ "vnc_tls": false,
+ "vnc_x509_path": "",
+ "vnc_x509_verify": false,
+ "vnet_hdr": true
+ },
+ "lxc": {
+ "cpu_mask": "",
+ "devices": "c 1:3 rw,c 1:5 rw,c 1:7 rw,c 1:8 rw,c 1:9 rw,c 1:10 rw,c 5:0 rw,c 5:1 rw,c 5:2 rw,c 136:* rw",
+ "drop_capabilities": "mac_override,sys_boot,sys_module,sys_time,sys_admin",
+ "extra_cgroups": "",
+ "extra_config": "",
+ "lxc_cgroup_use": "",
+ "lxc_devices": "c 1:3 rw,c 1:5 rw,c 1:7 rw,c 1:8 rw,c 1:9 rw,c 1:10 rw,c 5:0 rw,c 5:1 rw,c 5:2 rw,c 136:* rw",
+ "lxc_drop_capabilities": "mac_override,sys_boot,sys_module,sys_time",
+ "lxc_extra_config": "",
+ "lxc_startup_wait": 30,
+ "lxc_tty": 6,
+ "num_ttys": 6,
+ "startup_timeout": 30
+ },
+ "xen-hvm": {
+ "acpi": true,
+ "blockdev_prefix": "hd",
+ "boot_order": "cd",
+ "cdrom_image_path": "",
+ "cpu_cap": 0,
+ "cpu_mask": "all",
+ "cpu_weight": 256,
+ "cpuid": "",
+ "device_model": "/usr/lib/xen/bin/qemu-dm",
+ "disk_type": "paravirtual",
+ "kernel_path": "/usr/lib/xen/boot/hvmloader",
+ "migration_mode": "non-live",
+ "migration_port": 8082,
+ "nic_type": "rtl8139",
+ "pae": true,
+ "pci_pass": "",
+ "reboot_behavior": "reboot",
+ "soundhw": "",
+ "use_localtime": false,
+ "vif_script": "",
+ "vif_type": "ioemu",
+ "viridian": false,
+ "vnc_bind_address": "0.0.0.0",
+ "vnc_password_file": "/your/vnc-cluster-password",
+ "xen_cmd": "xm"
+ },
+ "xen-pvm": {
+ "blockdev_prefix": "sd",
+ "bootloader_args": "",
+ "bootloader_path": "",
+ "cpu_cap": 0,
+ "cpu_mask": "all",
+ "cpu_weight": 256,
+ "cpuid": "",
+ "initrd_path": "",
+ "kernel_args": "ro",
+ "kernel_path": "/boot/vmlinuz-xenU",
+ "migration_mode": "live",
+ "migration_port": 8082,
+ "reboot_behavior": "reboot",
+ "root_path": "/dev/xvda1",
+ "soundhw": "",
+ "use_bootloader": false,
+ "vif_script": "",
+ "xen_cmd": "xm"
+ }
+ },
+ "install_image": "",
+ "instance_communication_network": "",
+ "ipolicy": {
+ "disk-templates": [
+ "drbd",
+ "plain",
+ "sharedfile",
+ "file"
+ ],
+ "minmax": [
+ {
+ "max": {
+ "cpu-count": 8,
+ "disk-count": 16,
+ "disk-size": 1048576,
+ "memory-size": 32768,
+ "nic-count": 8,
+ "spindle-use": 12
+ },
+ "min": {
+ "cpu-count": 1,
+ "disk-count": 1,
+ "disk-size": 1024,
+ "memory-size": 128,
+ "nic-count": 1,
+ "spindle-use": 1
+ }
+ }
+ ],
+ "spindle-ratio": 32.0,
+ "std": {
+ "cpu-count": 1,
+ "disk-count": 1,
+ "disk-size": 1024,
+ "memory-size": 128,
+ "nic-count": 1,
+ "spindle-use": 1
+ },
+ "vcpu-ratio": 1.0,
+ "memory-ratio": 1.7
+ },
+ "mac_prefix": "aa:bb:cc",
+ "maintain_node_health": false,
+ "master_ip": "192.0.2.87",
+ "master_netdev": "eth0",
+ "master_netmask": 32,
+ "master_node": "9a12d554-75c0-4cb1-8064-103365145db0",
+ "max_running_jobs": 20,
+ "max_tracked_jobs": 25,
+ "modify_etc_hosts": true,
+ "modify_ssh_setup": true,
+ "mtime": 1361964122.7947099,
+ "ndparams": {
+ "cpu_speed": 1.0,
+ "exclusive_storage": false,
+ "oob_program": "",
+ "ovs": false,
+ "ovs_link": "",
+ "ovs_name": "switch1",
+ "spindle_count": 1,
+ "ssh_port": 22
+ },
+ "nicparams": {
+ "default": {
+ "link": "br974",
+ "mode": "bridged",
+ "vlan": ""
+ }
+ },
+ "os_hvp": {
+ "TEMP-Ganeti-QA-OS": {
+ "xen-hvm": {
+ "acpi": false,
+ "pae": true
+ },
+ "xen-pvm": {
+ "root_path": "/dev/sda5"
+ }
+ }
+ },
+ "osparams": {},
+ "osparams_private_cluster": {},
+ "prealloc_wipe_disks": false,
+ "primary_ip_family": 2,
+ "reserved_lvs": [],
+ "rsahostkeypub": "YOURKEY",
+ "serial_no": 3189,
+ "shared_file_storage_dir": "/srv/ganeti/shared-file-storage",
+ "ssh_key_bits": 1024,
+ "ssh_key_type": "dsa",
+ "tags": [
+ "mytag"
+ ],
+ "tcpudp_port_pool": [
+ 32104,
+ 32105,
+ 32101,
+ 32102,
+ 32103
+ ],
+ "uid_pool": [],
+ "use_external_mip_script": false,
+ "uuid": "dddf8c12-f2d8-4718-a35b-7804daf12a3f",
+ "volume_group_name": "xenvg",
+ "zeroing_image": ""
+ },
+ "ctime": 1343869045.6055231,
+ "disks": {
+ "150bd154-8e23-44d1-b762-5065ae5a507b": {
+ "ctime": 1354038435.343601,
+ "dev_type": "plain",
+ "iv_name": "disk/0",
+ "logical_id": [
+ "xenvg",
+ "b27a576a-13f7-4f07-885c-63fcad4fdfcc.disk0"
+ ],
+ "mode": "rw",
+ "mtime": 1354038435.343601,
+ "nodes": [
+ "2ae3d962-2dad-44f2-bdb1-85f77107f907"
+ ],
+ "params": {},
+ "serial_no": 1,
+ "size": 1280,
+ "uuid": "150bd154-8e23-44d1-b762-5065ae5a507b"
+ },
+ "77ced3a5-6756-49ae-8d1f-274e27664c05": {
+ "children": [
+ {
+ "ctime": 1421677173.7280669,
+ "dev_type": "plain",
+ "logical_id": [
+ "xenvg",
+ "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_data"
+ ],
+ "mtime": 1421677173.7280591,
+ "nodes": [
+ "9a12d554-75c0-4cb1-8064-103365145db0",
+ "41f9c238-173c-4120-9e41-04ad379b647a"
+ ],
+ "params": {},
+ "serial_no": 1,
+ "size": 1024
+ },
+ {
+ "ctime": 1421677173.728096,
+ "dev_type": "plain",
+ "logical_id": [
+ "xenvg",
+ "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_meta"
+ ],
+ "mtime": 1421677173.7280879,
+ "nodes": [
+ "9a12d554-75c0-4cb1-8064-103365145db0",
+ "41f9c238-173c-4120-9e41-04ad379b647a"
+ ],
+ "params": {},
+ "serial_no": 1,
+ "size": 128
+ }
+ ],
+ "ctime": 1363620258.6089759,
+ "dev_type": "drbd",
+ "iv_name": "disk/0",
+ "logical_id": [
+ "9a12d554-75c0-4cb1-8064-103365145db0",
+ "41f9c238-173c-4120-9e41-04ad379b647a",
+ 32100,
+ 0,
+ 0,
+ "d3c3fd475fcbaf5fd177fb245ac43b71247ada38"
+ ],
+ "mode": "rw",
+ "mtime": 1363620258.6089759,
+ "nodes": [
+ "9a12d554-75c0-4cb1-8064-103365145db0",
+ "41f9c238-173c-4120-9e41-04ad379b647a"
+ ],
+ "params": {},
+ "serial_no": 1,
+ "size": 1024,
+ "uuid": "77ced3a5-6756-49ae-8d1f-274e27664c05"
+ },
+ "79acf611-be58-4334-9fe4-4f2b73ae8abb": {
+ "ctime": 1355186880.4511809,
+ "dev_type": "plain",
+ "iv_name": "disk/0",
+ "logical_id": [
+ "xenvg",
+ "3e559cd7-1024-4294-a923-a9fd13182b2f.disk0"
+ ],
+ "mode": "rw",
+ "mtime": 1355186880.4511809,
+ "nodes": [
+ "41f9c238-173c-4120-9e41-04ad379b647a"
+ ],
+ "params": {},
+ "serial_no": 1,
+ "size": 102400,
+ "uuid": "79acf611-be58-4334-9fe4-4f2b73ae8abb"
+ }
+ },
+ "filters": {},
+ "instances": {
+ "4e091bdc-e205-4ed7-8a47-0c9130a6619f": {
+ "admin_state": "up",
+ "admin_state_source": "admin",
+ "beparams": {},
+ "ctime": 1354038435.343601,
+ "disks": [
+ "150bd154-8e23-44d1-b762-5065ae5a507b"
+ ],
+ "disks_active": true,
+ "hvparams": {},
+ "hypervisor": "xen-pvm",
+ "mtime": 1354224585.700732,
+ "name": "instance3.example.com",
+ "nics": [
+ {
+ "mac": "aa:bb:cc:5e:5c:75",
+ "nicparams": {},
+ "uuid": "1ab090c1-e017-406c-afb4-fc285cb43e31"
+ }
+ ],
+ "os": "debian-image",
+ "osparams": {},
+ "osparams_private": {},
+ "primary_node": "2ae3d962-2dad-44f2-bdb1-85f77107f907",
+ "serial_no": 4,
+ "tags": [],
+ "uuid": "4e091bdc-e205-4ed7-8a47-0c9130a6619f"
+ },
+ "6c078d22-3eb6-4780-857d-81772e09eef1": {
+ "admin_state": "up",
+ "admin_state_source": "admin",
+ "beparams": {},
+ "ctime": 1363620258.6089759,
+ "disks": [
+ "77ced3a5-6756-49ae-8d1f-274e27664c05"
+ ],
+ "disks_active": true,
+ "hvparams": {},
+ "hypervisor": "xen-pvm",
+ "mtime": 1363620320.8749011,
+ "name": "instance1.example.com",
+ "nics": [
+ {
+ "mac": "aa:bb:cc:b2:6e:0b",
+ "nicparams": {},
+ "uuid": "2c953d72-fac4-4aa9-a225-4131bb271791"
+ }
+ ],
+ "os": "busybox",
+ "osparams": {},
+ "osparams_private": {},
+ "primary_node": "9a12d554-75c0-4cb1-8064-103365145db0",
+ "serial_no": 2,
+ "uuid": "6c078d22-3eb6-4780-857d-81772e09eef1"
+ },
+ "8fde9f6d-e1f1-4850-9e9c-154966f622f5": {
+ "admin_state": "up",
+ "admin_state_source": "admin",
+ "beparams": {},
+ "ctime": 1355186880.4511809,
+ "disks": [
+ "79acf611-be58-4334-9fe4-4f2b73ae8abb"
+ ],
+ "disks_active": true,
+ "hvparams": {},
+ "hypervisor": "xen-pvm",
+ "mtime": 1355186898.307642,
+ "name": "instance2.example.com",
+ "nics": [
+ {
+ "mac": "aa:bb:cc:56:83:fb",
+ "nicparams": {},
+ "uuid": "1cf95562-e676-4fd0-8214-e8b84a2f7bd1"
+ }
+ ],
+ "os": "debian-image",
+ "osparams": {},
+ "osparams_private": {},
+ "primary_node": "41f9c238-173c-4120-9e41-04ad379b647a",
+ "serial_no": 2,
+ "tags": [],
+ "uuid": "8fde9f6d-e1f1-4850-9e9c-154966f622f5"
+ }
+ },
+ "maintenance": {},
+ "mtime": 1421677173.729104,
+ "networks": {
+ "99f0128a-1c84-44da-90b9-9581ea00c075": {
+ "ext_reservations": "1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001",
+ "name": "a network",
+ "network": "203.0.113.0/24",
+ "reservations": "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+ "serial_no": 1,
+ "uuid": "99f0128a-1c84-44da-90b9-9581ea00c075"
+ }
+ },
+ "nodegroups": {
+ "5244a46d-7506-4e14-922d-02b58153dde1": {
+ "alloc_policy": "preferred",
+ "diskparams": {},
+ "ipolicy": {},
+ "mtime": 1361963775.5750091,
+ "name": "default",
+ "ndparams": {},
+ "networks": {},
+ "serial_no": 125,
+ "tags": [],
+ "uuid": "5244a46d-7506-4e14-922d-02b58153dde1"
+ },
+ "6c0a8916-b719-45ad-95dd-82192b1e473f": {
+ "alloc_policy": "preferred",
+ "diskparams": {},
+ "ipolicy": {
+ "disk-templates": [
+ "plain"
+ ],
+ "minmax": [
+ {
+ "max": {
+ "cpu-count": 8,
+ "disk-count": 16,
+ "disk-size": 1048576,
+ "memory-size": 32768,
+ "nic-count": 18,
+ "spindle-use": 14
+ },
+ "min": {
+ "cpu-count": 2,
+ "disk-count": 2,
+ "disk-size": 1024,
+ "memory-size": 128,
+ "nic-count": 1,
+ "spindle-use": 1
+ }
+ }
+ ],
+ "spindle-ratio": 5.2000000000000002,
+ "vcpu-ratio": 3.1400000000000001
+ },
+ "mtime": 1361963775.5750091,
+ "name": "another",
+ "ndparams": {
+ "exclusive_storage": true
+ },
+ "networks": {},
+ "serial_no": 125,
+ "tags": [],
+ "uuid": "6c0a8916-b719-45ad-95dd-82192b1e473f"
+ }
+ },
+ "nodes": {
+ "2ae3d962-2dad-44f2-bdb1-85f77107f907": {
+ "ctime": 1343869045.6048839,
+ "drained": false,
+ "group": "5244a46d-7506-4e14-922d-02b58153dde1",
+ "master_candidate": true,
+ "master_capable": true,
+ "mtime": 1358348755.779906,
+ "name": "node2.example.com",
+ "ndparams": {},
+ "offline": false,
+ "powered": true,
+ "primary_ip": "192.0.2.83",
+ "secondary_ip": "198.51.100.83",
+ "serial_no": 6,
+ "tags": [],
+ "uuid": "2ae3d962-2dad-44f2-bdb1-85f77107f907",
+ "vm_capable": true
+ },
+ "41f9c238-173c-4120-9e41-04ad379b647a": {
+ "ctime": 1343869205.9348071,
+ "drained": false,
+ "group": "5244a46d-7506-4e14-922d-02b58153dde1",
+ "master_candidate": true,
+ "master_capable": true,
+ "mtime": 1353019704.8853681,
+ "name": "node3.example.com",
+ "ndparams": {},
+ "offline": false,
+ "powered": true,
+ "primary_ip": "192.0.2.84",
+ "secondary_ip": "198.51.100.84",
+ "serial_no": 2,
+ "tags": [],
+ "uuid": "41f9c238-173c-4120-9e41-04ad379b647a",
+ "vm_capable": true
+ },
+ "9a12d554-75c0-4cb1-8064-103365145db0": {
+ "ctime": 1349722460.022264,
+ "drained": false,
+ "group": "5244a46d-7506-4e14-922d-02b58153dde1",
+ "master_candidate": true,
+ "master_capable": true,
+ "mtime": 1359986533.3533289,
+ "name": "node1.example.com",
+ "ndparams": {},
+ "offline": false,
+ "powered": true,
+ "primary_ip": "192.0.2.82",
+ "secondary_ip": "198.51.100.82",
+ "serial_no": 197,
+ "tags": [],
+ "uuid": "9a12d554-75c0-4cb1-8064-103365145db0",
+ "vm_capable": true
+ }
+ },
+ "serial_no": 7627,
+ "version": 2180000
+}
diff --git a/test/data/htools/hbal-avoid-long-solutions.data b/test/data/htools/hbal-avoid-long-solutions.data
new file mode 100644
index 0000000..2637182
--- /dev/null
+++ b/test/data/htools/hbal-avoid-long-solutions.data
@@ -0,0 +1,17 @@
+group-01|fake-uuid-01|preferred|nic:old|
+
+node-01|16384|0|16384|409600|281600|16|N|fake-uuid-01|1|nic:new
+node-02|16384|0|16384|409600|281600|16|N|fake-uuid-01|1|nic:new
+node-03|16384|0|16384|409600|409600|16|N|fake-uuid-01|1|
+node-04|16384|0|16384|409600|409600|16|N|fake-uuid-01|1|
+
+inst1|1024|51200|1|running|Y|node-01|node-02|drbd||1
+inst2|1024|12800|1|running|Y|node-01|node-02|drbd||1
+inst3|1024|12800|1|running|Y|node-01|node-02|drbd||1
+inst4|1024|51200|1|running|Y|node-01|node-02|drbd||1
+
+htools:bandwidth:nic
+nic:new
+htools:bandwidth:nic:new::nic:new::1000
+htools:bandwidth:nic:old::nic:old::100
+htools:bandwidth:nic:new::nic:old::50
diff --git a/test/hs/Test/Ganeti/HTools/Node.hs b/test/hs/Test/Ganeti/HTools/Node.hs
index 24d942d..0960d26 100644
--- a/test/hs/Test/Ganeti/HTools/Node.hs
+++ b/test/hs/Test/Ganeti/HTools/Node.hs
@@ -416,7 +416,7 @@
inst'' = inst' { Instance.diskTemplate = Types.DTDrbd8 }
in case Node.addSec node inst'' pdx of
Ok node' -> Node.removeSec node' inst'' ==? node
- _ -> failTest "Can't add instance"
+ Bad r -> failTest $ "Can't add instance" ++ show r
-- | Check that no graph is created on an empty node list.
case_emptyNodeList :: Assertion
diff --git a/test/hs/Test/Ganeti/JQScheduler.hs b/test/hs/Test/Ganeti/JQScheduler.hs
index 04a6287..770d67e 100644
--- a/test/hs/Test/Ganeti/JQScheduler.hs
+++ b/test/hs/Test/Ganeti/JQScheduler.hs
@@ -520,7 +520,7 @@
-- `doc/design-optables.rst`.
prop_jobFiltering :: Property
prop_jobFiltering =
- forAllShrink arbitrary shrink $ \q ->
+ forAllShrink (arbitrary `suchThat` (not . null . qEnqueued)) shrink $ \q ->
forAllShrink (resize 4 arbitrary) shrink $ \(NonEmpty filterList) ->
let running = qRunning q ++ qManipulated q
@@ -545,8 +545,7 @@
-- Makes sure that each action appears with some probability.
actionName = head . words . show
- allActions = map actionName [ Accept, Continue, Pause, Reject
- , RateLimit 0 ]
+ allActions = map actionName [ Accept, Pause, Reject, RateLimit 0 ]
applyingActions = map (actionName . frAction)
. mapMaybe (applyingFilter filters)
$ map jJob enqueued
@@ -556,9 +555,8 @@
[ stableCover (a `elem` applyingActions) perc ("is " ++ a)
| a <- allActions ]
- -- `covers` should be after `==>` and before `conjoin` (see QuickCheck
- -- bugs 25 and 27).
- in (enqueued /= []) ==> actionCovers $ conjoin
+ -- `covers` should be before `conjoin` (see QuickCheck bug 27).
+ in actionCovers $ conjoin
[ counterexample "scheduled jobs must be subsequence" $
toRun `isSubsequenceOf` enqueued
diff --git a/test/hs/Test/Ganeti/Objects.hs b/test/hs/Test/Ganeti/Objects.hs
index ea17bc0..49cfb64 100644
--- a/test/hs/Test/Ganeti/Objects.hs
+++ b/test/hs/Test/Ganeti/Objects.hs
@@ -127,7 +127,7 @@
<*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary
<*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary
<*> fmap UTF8.fromString genUUID <*> arbitrary
- <*> (Set.fromList <$> genTags)
+ <*> arbitrary -- TagSet
$(genArbitrary ''BlockDriver)
@@ -218,7 +218,7 @@
-- serial
<*> arbitrary
-- tags
- <*> (Set.fromList <$> genTags)
+ <*> arbitrary
instance Arbitrary RealInstanceData where
arbitrary =
@@ -259,7 +259,7 @@
-- serial
<*> arbitrary
-- tags
- <*> (Set.fromList <$> genTags)
+ <*> arbitrary
instance Arbitrary Instance where
arbitrary = frequency [ (1, ForthcomingInstance <$> arbitrary)
@@ -367,7 +367,7 @@
arbitrary = (GenericContainer . Map.fromList) <$> arbitrary
instance Arbitrary TagSet where
- arbitrary = Set.fromList <$> genTags
+ arbitrary = TagSet . Set.fromList <$> genTags
instance Arbitrary IAllocatorParams where
arbitrary = return $ GenericContainer Map.empty
@@ -462,7 +462,7 @@
ctime <- arbitrary
mtime <- arbitrary
let n = Network name mac_prefix (mkIp4Network net netmask) net6 gateway
- gateway6 res ext_res uuid ctime mtime 0 Set.empty
+ gateway6 res ext_res uuid ctime mtime 0 emptyTagSet
return n
-- | Generate an arbitrary string consisting of '0' and '1' of the given length.
@@ -706,7 +706,7 @@
mtime <- arbitrary
uuid <- genFQDN `suchThat` (/= name)
serial <- arbitrary
- tags <- Set.fromList <$> genTags
+ tags <- arbitrary
let group = NodeGroup name members ndparams alloc_policy ipolicy diskparams
net_map hv_state disk_state ctime mtime (UTF8.fromString uuid)
serial tags
diff --git a/test/hs/Test/Ganeti/Query/Instance.hs b/test/hs/Test/Ganeti/Query/Instance.hs
index 6a961c4..b095ba8 100644
--- a/test/hs/Test/Ganeti/Query/Instance.hs
+++ b/test/hs/Test/Ganeti/Query/Instance.hs
@@ -40,7 +40,6 @@
import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.Map as Map
-import qualified Data.Set as Set
import System.Time (ClockTime(..))
import Ganeti.JSON
@@ -64,7 +63,7 @@
(PartialBeParams Nothing Nothing Nothing Nothing Nothing Nothing)
(GenericContainer Map.empty) (GenericContainer Map.empty)
adminState adminStateSource [] [] False Nothing epochTime epochTime
- (UTF8.fromString "") 0 Set.empty
+ (UTF8.fromString "") 0 emptyTagSet
where epochTime = TOD 0 0
-- | A fake InstanceInfo to be used to check values.
diff --git a/test/hs/Test/Ganeti/TestCommon.hs b/test/hs/Test/Ganeti/TestCommon.hs
index 43595df..2a6b977 100644
--- a/test/hs/Test/Ganeti/TestCommon.hs
+++ b/test/hs/Test/Ganeti/TestCommon.hs
@@ -124,11 +124,13 @@
import Ganeti.Types
import Ganeti.Utils.Monad (unfoldrM)
+#if !MIN_VERSION_QuickCheck(2,8,2)
-- * Arbitrary orphan instances
instance (Ord k, Arbitrary k, Arbitrary a) => Arbitrary (M.Map k a) where
arbitrary = M.fromList <$> arbitrary
shrink m = M.fromList <$> shrink (M.toList m)
+#endif
-- * Constants
diff --git a/test/hs/Test/Ganeti/Utils.hs b/test/hs/Test/Ganeti/Utils.hs
index c65db11..c437807 100644
--- a/test/hs/Test/Ganeti/Utils.hs
+++ b/test/hs/Test/Ganeti/Utils.hs
@@ -299,7 +299,12 @@
prop_timediffAdd a b c =
let fwd = Ganeti.Utils.diffClockTimes a b
back = Ganeti.Utils.diffClockTimes b a
- in addToClockTime fwd (addToClockTime back c) ==? c
+ c' = addToClockTime fwd (addToClockTime back c)
+ TOD cs cp = c
+ TOD cs' cp' = c'
+ in counterexample "Dates match exactly" (c' ==? c) .||.
+ counterexample "Dates match except daylight savings time"
+ (cs' - cs ==? 3600 .&&. cp' ==? cp)
-- | Test normal operation for 'chompPrefix'.
--
diff --git a/test/hs/shelltests/htools-hbal.test b/test/hs/shelltests/htools-hbal.test
index f7ce274..0420515 100644
--- a/test/hs/shelltests/htools-hbal.test
+++ b/test/hs/shelltests/htools-hbal.test
@@ -101,6 +101,38 @@
>>>/Solution length=2/
>>>= 0
+./test/hs/hbal -t $TESTDATA_DIR/hbal-avoid-long-solutions.data --long-solution-threshold=1000
+>>>/Solution length=4/
+>>>= 0
+
+./test/hs/hbal -t $TESTDATA_DIR/hbal-avoid-long-solutions.data --avoid-long-solutions=0.5
+>>>/Solution length=3/
+>>>= 0
+
+./test/hs/hbal -t $TESTDATA_DIR/hbal-avoid-long-solutions.data --avoid-long-solutions=5
+>>>/Solution length=2/
+>>>= 0
+
+./test/hs/hbal -t $TESTDATA_DIR/hbal-avoid-long-solutions.data --long-solution-threshold=1500 --avoid-long-solutions=0.25
+>>>/Solution length=4/
+>>>= 0
+
+./test/hs/hbal -t $TESTDATA_DIR/hbal-avoid-long-solutions.data --long-solution-threshold=1500 --avoid-long-solutions=1
+>>>/Solution length=3/
+>>>= 0
+
+./test/hs/hbal -t $TESTDATA_DIR/hbal-avoid-long-solutions.data --long-solution-threshold=1000 --avoid-long-solutions=5
+>>>/Solution length=2/
+>>>= 0
+
+./test/hs/hbal -t $TESTDATA_DIR/hbal-avoid-long-solutions.data --long-solution-threshold=10 --avoid-long-solutions=10
+>>>/Solution length=1/
+>>>= 0
+
+./test/hs/hbal -t $TESTDATA_DIR/hbal-avoid-long-solutions.data --long-solution-threshold=1 --avoid-long-solutions=10
+>>>/Solution length=0/
+>>>= 0
+
./test/hs/hbal -t $TESTDATA_DIR/hbal-memory-over-commitment.data
>>>/No solution found/
>>>= 0
diff --git a/test/py/cfgupgrade_unittest.py b/test/py/cfgupgrade_unittest.py
index 132575a..f17b428 100755
--- a/test/py/cfgupgrade_unittest.py
+++ b/test/py/cfgupgrade_unittest.py
@@ -445,11 +445,14 @@
def testUpgradeFullConfigFrom_2_17(self):
self._TestUpgradeFromFile("cluster_config_2.17.json", False)
- def test_2_17_to_2_16_downgrade(self):
- self._TestUpgradeFromFile("cluster_config_2.17.json", False)
+ def testUpgradeFullConfigFrom_2_18(self):
+ self._TestUpgradeFromFile("cluster_config_2.18.json", False)
+
+ def test_2_18_to_2_17_downgrade(self):
+ self._TestUpgradeFromFile("cluster_config_2.18.json", False)
_RunUpgrade(self.tmpdir, False, True, downgrade=True)
oldconf = self._LoadConfig()
- newconf = self._LoadTestDataConfig("cluster_config_2.16.json")
+ newconf = self._LoadTestDataConfig("cluster_config_2.17.json")
self.assertEqual(oldconf, newconf)
def testUpgradeCurrent(self):
@@ -469,7 +472,7 @@
def testDowngradeFullConfig(self):
"""Test for upgrade + downgrade combination."""
# This test can work only with the previous version of a configuration!
- oldconfname = "cluster_config_2.16.json"
+ oldconfname = "cluster_config_2.17.json"
self._TestUpgradeFromFile(oldconfname, False)
_RunUpgrade(self.tmpdir, False, True, downgrade=True)
oldconf = self._LoadTestDataConfig(oldconfname)
diff --git a/test/py/cmdlib/cluster_unittest.py b/test/py/cmdlib/cluster_unittest.py
index 1bdac3f..018cbd8 100644
--- a/test/py/cmdlib/cluster_unittest.py
+++ b/test/py/cmdlib/cluster_unittest.py
@@ -231,9 +231,12 @@
self.ExecOpCode(op)
- self.assertSingleHooksCall([self.master.name],
- "cluster-destroy",
- constants.HOOKS_PHASE_POST)
+ self.assertHooksCall([self.master.uuid], constants.GLOBAL_HOOKS_DIR,
+ constants.HOOKS_PHASE_PRE, index=0)
+ self.assertHooksCall([self.master.uuid], "cluster-destroy",
+ constants.HOOKS_PHASE_POST, index=1)
+ self.assertHooksCall([self.master.uuid], constants.GLOBAL_HOOKS_DIR,
+ constants.HOOKS_PHASE_POST, index=2)
class TestLUClusterPostInit(CmdlibTestCase):
@@ -243,9 +246,12 @@
self.ExecOpCode(op)
- self.assertSingleHooksCall([self.master.uuid],
- "cluster-init",
- constants.HOOKS_PHASE_POST)
+ self.assertHooksCall([self.master.uuid], constants.GLOBAL_HOOKS_DIR,
+ constants.HOOKS_PHASE_PRE, index=0)
+ self.assertHooksCall([self.master.uuid], "cluster-init",
+ constants.HOOKS_PHASE_POST, index=1)
+ self.assertHooksCall([self.master.uuid], constants.GLOBAL_HOOKS_DIR,
+ constants.HOOKS_PHASE_POST, index=2)
class TestLUClusterQuery(CmdlibTestCase):
@@ -1207,7 +1213,7 @@
def _AddNormalNode(self):
self.normalnode = copy.deepcopy(self.master)
self.normalnode.master_candidate = False
- self.normalnode.uuid = "normal-node-uuid"
+ self.normalnode.uuid = "deadbeef-dead-beef-dead-beefdeadbeef"
self.cfg.AddNode(self.normalnode, None)
def testVerifyMasterCandidate(self):
diff --git a/test/py/cmdlib/node_unittest.py b/test/py/cmdlib/node_unittest.py
index 8dcad78..d6dc66a 100644
--- a/test/py/cmdlib/node_unittest.py
+++ b/test/py/cmdlib/node_unittest.py
@@ -291,12 +291,12 @@
self.node = self.cfg.AddNewNode(
primary_ip='192.168.168.191',
secondary_ip='192.168.168.192',
- master_candidate=True, uuid='blue_bunny')
+ master_candidate=True, uuid='00000000-dead-beef-dead-beefdeadbeef')
self.snode = self.cfg.AddNewNode(
primary_ip='192.168.168.193',
secondary_ip='192.168.168.194',
- master_candidate=True, uuid='pink_bunny')
+ master_candidate=True, uuid='11111111-dead-beef-dead-beefdeadbeef')
def testSetSecondaryIp(self):
self.instance = self.cfg.AddNewInstance(primary_node=self.node,
@@ -310,8 +310,8 @@
self.assertEqual(sorted(self.wconfd.all_locks.items()), [
('cluster/BGL', 'shared'),
('instance/mock_inst_1.example.com', 'shared'),
- ('node-res/blue_bunny', 'exclusive'),
- ('node/blue_bunny', 'exclusive')])
+ ('node-res/00000000-dead-beef-dead-beefdeadbeef', 'exclusive'),
+ ('node/00000000-dead-beef-dead-beefdeadbeef', 'exclusive')])
def testSetSecondaryIpNoLock(self):
self.instance = self.cfg.AddNewInstance(primary_node=self.node,
@@ -324,8 +324,8 @@
self.assertEqual('254.254.254.254', self.node.secondary_ip)
self.assertEqual(sorted(self.wconfd.all_locks.items()), [
('cluster/BGL', 'shared'),
- ('node-res/blue_bunny', 'exclusive'),
- ('node/blue_bunny', 'exclusive')])
+ ('node-res/00000000-dead-beef-dead-beefdeadbeef', 'exclusive'),
+ ('node/00000000-dead-beef-dead-beefdeadbeef', 'exclusive')])
if __name__ == "__main__":
diff --git a/test/py/cmdlib/test_unittest.py b/test/py/cmdlib/test_unittest.py
old mode 100644
new mode 100755
index f93f99d..9e538ab
--- a/test/py/cmdlib/test_unittest.py
+++ b/test/py/cmdlib/test_unittest.py
@@ -85,7 +85,8 @@
self.ExecOpCode(op)
- self.rpc.call_test_delay.assert_called_once()
+ self.rpc.call_test_delay.assert_called_once_with(
+ [self.master_uuid], DELAY_DURATION)
def testFailingRpc(self):
op = opcodes.OpTestDelay(duration=DELAY_DURATION,
diff --git a/test/py/cmdlib/testsupport/cmdlib_testcase.py b/test/py/cmdlib/testsupport/cmdlib_testcase.py
index 4e459f3..f98b751 100644
--- a/test/py/cmdlib/testsupport/cmdlib_testcase.py
+++ b/test/py/cmdlib/testsupport/cmdlib_testcase.py
@@ -326,7 +326,7 @@
self.mcpu.assertLogContainsRegex(expected_regex)
def assertHooksCall(self, nodes, hook_path, phase,
- environment=None, count=None, index=0):
+ environment=None, index=0):
"""Asserts a call to C{rpc.call_hooks_runner}
@type nodes: list of string
@@ -338,16 +338,11 @@
@type environment: dict
@param environment: the environment passed to the hooks. C{None} to skip
asserting it
- @type count: int
- @param count: the number of hook invocations. C{None} to skip asserting it
@type index: int
@param index: the index of the hook invocation to assert
"""
- if count is not None:
- self.assertEqual(count, self.rpc.call_hooks_runner.call_count)
-
- args = self.rpc.call_hooks_runner.call_args[index]
+ args = self.rpc.call_hooks_runner.mock_calls[index][1]
self.assertEqual(set(nodes), set(args[0]))
self.assertEqual(hook_path, args[1])
@@ -355,16 +350,6 @@
if environment is not None:
self.assertEqual(environment, args[3])
- def assertSingleHooksCall(self, nodes, hook_path, phase,
- environment=None):
- """Asserts a single call to C{rpc.call_hooks_runner}
-
- @see L{assertHooksCall} for parameter description.
-
- """
- self.assertHooksCall(nodes, hook_path, phase,
- environment=environment, count=1)
-
def CopyOpCode(self, opcode, **kwargs):
"""Creates a copy of the given opcode and applies modifications to it
diff --git a/test/py/cmdlib/testsupport/rpc_runner_mock.py b/test/py/cmdlib/testsupport/rpc_runner_mock.py
index 9658963..d19c39b 100644
--- a/test/py/cmdlib/testsupport/rpc_runner_mock.py
+++ b/test/py/cmdlib/testsupport/rpc_runner_mock.py
@@ -39,11 +39,23 @@
from cmdlib.testsupport.util import patchModule
+# We don't need arguments other than nodes in this mock.
+def MockHooksExecutionFn(nodes, _hpath, _phase, _env):
+ """Helper function that generate rpc results for call_hooks_runner mock
+
+ """
+ results = RpcResultsBuilder()
+ for node in nodes:
+ results.AddSuccessfulNode(node, data=None, get_node_id_fn=lambda nid: nid)
+ return results.Build()
+
+
def CreateRpcRunnerMock():
"""Creates a new L{mock.MagicMock} tailored for L{rpc.RpcRunner}
"""
ret = mock.MagicMock(spec=rpc.RpcRunner)
+ ret.call_hooks_runner.side_effect = MockHooksExecutionFn
return ret
@@ -106,7 +118,8 @@
else:
return node.uuid
- def CreateSuccessfulNodeResult(self, node, data=None):
+ def CreateSuccessfulNodeResult(self, node, data=None,
+ get_node_id_fn=None):
"""@see L{RpcResultsBuilder}
@param node: @see L{RpcResultsBuilder}.
@@ -116,7 +129,8 @@
"""
if data is None:
data = {}
- return rpc.RpcResult(data=(True, data), node=self._GetNodeId(node))
+ node_id = get_node_id_fn(node) if get_node_id_fn else self._GetNodeId(node)
+ return rpc.RpcResult(data=(True, data), node=node_id)
def CreateFailedNodeResult(self, node):
"""@see L{RpcResultsBuilder}
@@ -144,14 +158,15 @@
"""
return rpc.RpcResult(data=(False, error_msg), node=self._GetNodeId(node))
- def AddSuccessfulNode(self, node, data=None):
+ def AddSuccessfulNode(self, node, data=None, get_node_id_fn=None):
"""@see L{CreateSuccessfulNode}
@rtype: L{RpcResultsBuilder}
@return: self for chaining
"""
- self._results.append(self.CreateSuccessfulNodeResult(node, data))
+ self._results.append(self.CreateSuccessfulNodeResult(node, data,
+ get_node_id_fn))
return self
def AddFailedNode(self, node):
diff --git a/test/py/ganeti.backend_unittest.py b/test/py/ganeti.backend_unittest.py
index e737dad..4f8f4a7 100755
--- a/test/py/ganeti.backend_unittest.py
+++ b/test/py/ganeti.backend_unittest.py
@@ -678,15 +678,25 @@
shutil.rmtree(self.tmpdir)
def _Test(self, name, idx):
- self.assertEqual(backend._GetBlockDevSymlinkPath(name, idx,
+ self.assertEqual(backend._GetBlockDevSymlinkPath(name, idx=idx,
_dir=self.tmpdir),
("%s/%s%s%s" % (self.tmpdir, name,
constants.DISK_SEPARATOR, idx)))
- def test(self):
+ def testIndex(self):
for idx in range(100):
self._Test("inst1.example.com", idx)
+ def testUUID(self):
+ uuid = "6bcb6530-3695-47b6-9528-1ed7b5cfbf5c"
+ iname = "inst1.example.com"
+ dummy_idx = 6 # UUID should be prefered
+ expected = "%s/%s%s%s" % (self.tmpdir, iname,
+ constants.DISK_SEPARATOR, uuid)
+ link_name = backend._GetBlockDevSymlinkPath(iname, idx=dummy_idx,
+ uuid=uuid, _dir=self.tmpdir)
+ self.assertEqual(expected, link_name)
+
class TestGetInstanceList(unittest.TestCase):
diff --git a/test/py/ganeti.hooks_unittest.py b/test/py/ganeti.hooks_unittest.py
index ab7ddda..7476128 100755
--- a/test/py/ganeti.hooks_unittest.py
+++ b/test/py/ganeti.hooks_unittest.py
@@ -60,7 +60,8 @@
return {}
def BuildHooksNodes(self):
- return ["a"], ["a"]
+ return (["aaaaaaaa-dead-beef-dead-beefdeadbeef"],
+ ["aaaaaaaa-dead-beef-dead-beefdeadbeef"])
class TestHooksRunner(unittest.TestCase):
@@ -222,9 +223,9 @@
class TestHooksMaster(unittest.TestCase):
"""Testing case for HooksMaster"""
- def _call_false(*args):
- """Fake call_hooks_runner function which returns False."""
- return False
+ def _call_fail(*args):
+ """Fake call_hooks_runner which returns an empty result dictionary."""
+ return {}
@staticmethod
def _call_nodes_false(node_list, hpath, phase, env):
@@ -259,7 +260,7 @@
def testTotalFalse(self):
"""Test complete rpc failure"""
- hm = hooksmaster.HooksMaster.BuildFromLu(self._call_false, self.lu)
+ hm = hooksmaster.HooksMaster.BuildFromLu(self._call_fail, self.lu)
self.failUnlessRaises(errors.HooksFailure,
hm.RunPhase, constants.HOOKS_PHASE_PRE)
hm.RunPhase(constants.HOOKS_PHASE_POST)
@@ -299,7 +300,8 @@
return self.hook_env
def BuildHooksNodes(self):
- return (["a"], ["a"])
+ return (["aaaaaaaa-dead-beef-dead-beefdeadbeef"],
+ ["aaaaaaaa-dead-beef-dead-beefdeadbeef"])
class FakeNoHooksLU(cmdlib.NoHooksLU):
@@ -414,7 +416,7 @@
def testNoNodes(self):
self.lu.hook_env = {}
hm = hooksmaster.HooksMaster.BuildFromLu(self._HooksRpc, self.lu)
- hm.RunPhase(constants.HOOKS_PHASE_PRE, node_names=[])
+ hm.RunPhase(constants.HOOKS_PHASE_PRE, node_uuids=[])
self.assertRaises(IndexError, self._rpcs.pop)
def testSpecificNodes(self):
@@ -515,10 +517,11 @@
return {}
def BuildHooksNodes(self):
- return (["a"], ["a"])
+ return (["aaaaaaaa-dead-beef-dead-beefdeadbeef"],
+ ["aaaaaaaa-dead-beef-dead-beefdeadbeef"])
def PreparePostHookNodes(self, post_hook_node_uuids):
- return post_hook_node_uuids + ["b"]
+ return post_hook_node_uuids + ["bbbbbbbb-dead-beef-dead-beefdeadbeef"]
class TestHooksRunnerEnv(unittest.TestCase):
@@ -542,16 +545,104 @@
hm.RunPhase(constants.HOOKS_PHASE_PRE)
(node_list, hpath, phase, env) = self._rpcs.pop(0)
- self.assertEqual(node_list, set(["a"]))
+ self.assertEqual(node_list, set(["aaaaaaaa-dead-beef-dead-beefdeadbeef"]))
# Check post-phase hook
hm.RunPhase(constants.HOOKS_PHASE_POST)
(node_list, hpath, phase, env) = self._rpcs.pop(0)
- self.assertEqual(node_list, set(["a", "b"]))
+ self.assertEqual(node_list, set(["aaaaaaaa-dead-beef-dead-beefdeadbeef",
+ "bbbbbbbb-dead-beef-dead-beefdeadbeef"]))
self.assertRaises(IndexError, self._rpcs.pop)
+class TestGlobalHooks(unittest.TestCase):
+ """Testing case for global post hooks.
+
+ The testing case tests global hooks functionality which is not covered by
+ the corresponding qa tests.
+ """
+ def setUp(self):
+ """Initialize rpc mock calls archive and arguments for the hooksmaster.
+
+ """
+ self._rpcs = []
+
+ self.op = opcodes.OpTestDummy(result=False, messages=[], fail=False)
+ self.master_uuid = "aaaaaaaa-dead-beef-dead-beefdeadbeef"
+ self.other_uuid = "bbbbbbbb-dead-beef-dead-beefdeadbeef"
+ self.nodes = [self.master_uuid, self.other_uuid]
+ self.hooks_nodes = (frozenset([]), frozenset(self.nodes))
+ self.cluster_name = "mock_cluster_name"
+ self.master_name = "mock_master_name"
+ self.job_id = 1234
+ self.rpc_res_conv = hooksmaster.RpcResultsToHooksResults
+
+ def _HooksRpc(self, *args):
+ self._rpcs.append(args)
+ return FakeHooksRpcSuccess(*args)
+
+ def testGlobalHooks(self):
+ """Initializes hooksmaster and runs hooks with mocked rpc.
+
+ Checks the following statements:
+ - global hooks should be run on the master node even in case of empty
+ nodes list;
+ - global hooks should be run on the master node separately from
+ the other nodes;
+ - the environmental variable IS_MASTER should be set to "master" when
+ executing on the master node;
+ - the environmental variable IS_MASTER should be set to "not_master" when
+ executing on the other nodes;
+ - for the post hooks *status* variable should be set correctly;
+ - the hooks path should be set to GLOBAL_HOOKS_DIR;
+ - phase variable should be set correctly.
+ """
+ hm = hooksmaster.HooksMaster(self.op.OP_ID, hooks_path="test",
+ nodes=self.hooks_nodes,
+ hooks_execution_fn=self._HooksRpc,
+ hooks_results_adapt_fn=self.rpc_res_conv,
+ build_env_fn=lambda : {},
+ prepare_post_nodes_fn=None,
+ log_fn=None, htype=None,
+ cluster_name=self.cluster_name,
+ master_name=self.master_name,
+ master_uuid=self.master_uuid,
+ job_id=self.job_id)
+ # Run global pre hooks.
+ hm.RunPhase(constants.HOOKS_PHASE_PRE, is_global=True)
+
+ # Check the execution results on the master node.
+ (node_list, hpath, phase, env) = self._rpcs.pop(0)
+ self.assertEqual(node_list, set([self.master_uuid]),
+ "Pre hooks should have been run on master only")
+ self.assertEqual(hpath, constants.GLOBAL_HOOKS_DIR)
+ self.assertEqual(phase, constants.HOOKS_PHASE_PRE)
+ self.assertEqual(env["GANETI_IS_MASTER"], constants.GLOBAL_HOOKS_MASTER)
+
+ # Run global post hooks.
+ hm.RunPhase(constants.HOOKS_PHASE_POST, is_global=True,
+ post_status=constants.POST_HOOKS_STATUS_SUCCESS)
+
+ # Check the execution results on the master node.
+ (node_list, hpath, phase, env) = self._rpcs.pop(0)
+ self.assertEqual(node_list, set([self.master_uuid]),
+ "Post hooks should have been run on master separately")
+ self.assertEqual(hpath, constants.GLOBAL_HOOKS_DIR)
+ self.assertEqual(phase, constants.HOOKS_PHASE_POST)
+ self.assertEqual(env["GANETI_IS_MASTER"], constants.GLOBAL_HOOKS_MASTER)
+
+ # Check the execution results on the other nodes.
+ (node_list, hpath, phase, env) = self._rpcs.pop(0)
+ self.assertEqual(node_list, set([self.other_uuid]),
+ "Post hooks nodes set is not equal the passed set")
+ self.assertEqual(hpath, constants.GLOBAL_HOOKS_DIR)
+ self.assertEqual(phase, constants.HOOKS_PHASE_POST)
+ self.assertEqual(env["GANETI_IS_MASTER"], constants.GLOBAL_HOOKS_NOT_MASTER)
+
+ # Ensure that there were no more rpc mock executions.
+ self.assertRaises(IndexError, self._rpcs.pop)
+
if __name__ == "__main__":
testutils.GanetiTestProgram()
diff --git a/test/py/ganeti.http_unittest.py b/test/py/ganeti.http_unittest.py
index 753ddf3..7e6ba38 100755
--- a/test/py/ganeti.http_unittest.py
+++ b/test/py/ganeti.http_unittest.py
@@ -42,6 +42,7 @@
from ganeti import http
from ganeti import compat
+from ganeti.rapi.auth import users_file
import ganeti.http.server
import ganeti.http.client
@@ -122,12 +123,12 @@
class _FakeRequestAuth(http.auth.HttpServerRequestAuthentication):
- def __init__(self, realm, authreq, authenticate_fn):
+ def __init__(self, realm, authreq, authenticator):
http.auth.HttpServerRequestAuthentication.__init__(self)
self.realm = realm
self.authreq = authreq
- self.authenticate_fn = authenticate_fn
+ self.authenticator = authenticator
def AuthenticationRequired(self, req):
return self.authreq
@@ -135,9 +136,11 @@
def GetAuthRealm(self, req):
return self.realm
- def Authenticate(self, *args):
- if self.authenticate_fn:
- return self.authenticate_fn(*args)
+ def Authenticate(self, req):
+ handler_access = []
+ if self.authenticator:
+ return self.authenticator.ValidateRequest(
+ req, handler_access, self.GetAuthRealm(req))
raise NotImplementedError()
@@ -155,7 +158,7 @@
def _testVerifyBasicAuthPassword(self, realm, user, password, expected):
ra = _FakeRequestAuth(realm, False, None)
- return ra.VerifyBasicAuthPassword(None, user, password, expected)
+ return ra.VerifyBasicAuthPassword(user, password, expected, realm)
def testVerifyBasicAuthPassword(self):
tvbap = self._testVerifyBasicAuthPassword
@@ -204,9 +207,17 @@
self.password = password
self.called = False
- def __call__(self, req, user, password):
+ def ValidateRequest(self, req, handler_access, realm):
self.called = True
- return self.user == user and self.password == password
+
+ username, password = http.auth.HttpServerRequestAuthentication \
+ .ExtractUserPassword(req)
+ if username is None or password is None:
+ return False
+
+ return (self.user == username and
+ http.auth.HttpServerRequestAuthentication.VerifyBasicAuthPassword(
+ username, password, self.password, realm))
class TestHttpServerRequestAuthentication(unittest.TestCase):
@@ -217,26 +228,30 @@
def testNoRealm(self):
headers = { http.HTTP_AUTHORIZATION: "", }
req = http.server._HttpServerRequest("GET", "/", headers, None, None)
- ra = _FakeRequestAuth(None, False, None)
+ ac = _SimpleAuthenticator("foo", "bar")
+ ra = _FakeRequestAuth(None, False, ac)
self.assertRaises(AssertionError, ra.PreHandleRequest, req)
def testNoScheme(self):
headers = { http.HTTP_AUTHORIZATION: "", }
req = http.server._HttpServerRequest("GET", "/", headers, None, None)
- ra = _FakeRequestAuth("area1", False, None)
+ ac = _SimpleAuthenticator("foo", "bar")
+ ra = _FakeRequestAuth("area1", False, ac)
self.assertRaises(http.HttpUnauthorized, ra.PreHandleRequest, req)
def testUnknownScheme(self):
headers = { http.HTTP_AUTHORIZATION: "NewStyleAuth abc", }
req = http.server._HttpServerRequest("GET", "/", headers, None, None)
- ra = _FakeRequestAuth("area1", False, None)
+ ac = _SimpleAuthenticator("foo", "bar")
+ ra = _FakeRequestAuth("area1", False, ac)
self.assertRaises(http.HttpUnauthorized, ra.PreHandleRequest, req)
def testInvalidBase64(self):
headers = { http.HTTP_AUTHORIZATION: "Basic x_=_", }
req = http.server._HttpServerRequest("GET", "/", headers, None, None)
- ra = _FakeRequestAuth("area1", False, None)
- self.assertRaises(http.HttpUnauthorized, ra.PreHandleRequest, req)
+ ac = _SimpleAuthenticator("foo", "bar")
+ ra = _FakeRequestAuth("area1", False, ac)
+ self.assertRaises(http.HttpBadRequest, ra.PreHandleRequest, req)
def testAuthForPublicResource(self):
headers = {
@@ -268,11 +283,12 @@
http.HttpBadRequest: ["Basic"],
}
+ ac = _SimpleAuthenticator("foo", "bar")
for exc, headers in checks.items():
for i in headers:
headers = { http.HTTP_AUTHORIZATION: i, }
req = http.server._HttpServerRequest("GET", "/", headers, None, None)
- ra = _FakeRequestAuth("area1", False, None)
+ ra = _FakeRequestAuth("area1", False, ac)
self.assertRaises(exc, ra.PreHandleRequest, req)
def testBasicAuth(self):
@@ -307,7 +323,7 @@
class TestReadPasswordFile(unittest.TestCase):
def testSimple(self):
- users = http.auth.ParsePasswordFile("user1 password")
+ users = users_file.ParsePasswordFile("user1 password")
self.assertEqual(len(users), 1)
self.assertEqual(users["user1"].password, "password")
self.assertEqual(len(users["user1"].options), 0)
@@ -322,7 +338,7 @@
buf.write(" \t# Another comment\n")
buf.write("invalidline\n")
- users = http.auth.ParsePasswordFile(buf.getvalue())
+ users = users_file.ParsePasswordFile(buf.getvalue())
self.assertEqual(len(users), 2)
self.assertEqual(users["user1"].password, "password")
self.assertEqual(len(users["user1"].options), 0)
diff --git a/test/py/ganeti.hypervisor.hv_kvm_unittest.py b/test/py/ganeti.hypervisor.hv_kvm_unittest.py
index 16b1e0a..1226daf 100755
--- a/test/py/ganeti.hypervisor.hv_kvm_unittest.py
+++ b/test/py/ganeti.hypervisor.hv_kvm_unittest.py
@@ -37,6 +37,7 @@
import os
import struct
import re
+from contextlib import nested
from ganeti import serializer
from ganeti import constants
@@ -578,12 +579,12 @@
self.MockOut(mock.patch(kvm_class + '._CallMonitorCommand'))
self.cfg = ConfigMock()
- params = constants.HVC_DEFAULTS[constants.HT_KVM].copy()
- beparams = constants.BEC_DEFAULTS.copy()
+ self.params = constants.HVC_DEFAULTS[constants.HT_KVM].copy()
+ self.beparams = constants.BEC_DEFAULTS.copy()
self.instance = self.cfg.AddNewInstance(name='name.example.com',
hypervisor='kvm',
- hvparams=params,
- beparams=beparams)
+ hvparams=self.params,
+ beparams=self.beparams)
def testDirectoriesCreated(self):
hypervisor = hv_kvm.KVMHypervisor()
@@ -616,5 +617,92 @@
self.mocks['run_cmd'].side_effect = RunCmd
hypervisor.StartInstance(self.instance, [], False)
+
+class TestKvmCpuPinning(testutils.GanetiTestCase):
+ def setUp(self):
+ super(TestKvmCpuPinning, self).setUp()
+ kvm_class = 'ganeti.hypervisor.hv_kvm.KVMHypervisor'
+ self.MockOut('qmp', mock.patch('ganeti.hypervisor.hv_kvm.QmpConnection'))
+ self.MockOut('run_cmd', mock.patch('ganeti.utils.RunCmd'))
+ self.MockOut('ensure_dirs', mock.patch('ganeti.utils.EnsureDirs'))
+ self.MockOut('write_file', mock.patch('ganeti.utils.WriteFile'))
+ self.MockOut(mock.patch(kvm_class + '._InstancePidAlive',
+ return_value=(True, 1371, True)))
+ self.MockOut(mock.patch(kvm_class + '._GetVcpuThreadIds',
+ return_value=[1, 3, 5, 2, 4, 0 ]))
+ self.params = constants.HVC_DEFAULTS[constants.HT_KVM].copy()
+
+ def testCpuPinningDefault(self):
+ if hv_kvm.psutil is None:
+ # FIXME: switch to unittest.skip once python 2.6 is deprecated
+ print "skipped 'psutil Python package not found'"
+ return
+ mock_process = mock.MagicMock()
+ cpu_mask = self.params['cpu_mask']
+ worker_cpu_mask = self.params['worker_cpu_mask']
+ hypervisor = hv_kvm.KVMHypervisor()
+ with nested(mock.patch('psutil.Process', return_value=mock_process),
+ mock.patch('psutil.cpu_count', return_value=1237)):
+ hypervisor._ExecuteCpuAffinity('test_instance', cpu_mask, worker_cpu_mask)
+
+ self.assertEqual(mock_process.set_cpu_affinity.call_count, 1)
+ self.assertEqual(mock_process.set_cpu_affinity.call_args_list[0],
+ mock.call(range(0,1237)))
+
+ def testCpuPinningPerVcpu(self):
+ if hv_kvm.psutil is None:
+ # FIXME: switch to unittest.skip once python 2.6 is deprecated
+ print "skipped 'psutil Python package not found'"
+ return
+ mock_process = mock.MagicMock()
+ mock_process.set_cpu_affinity = mock.MagicMock()
+ mock_process.set_cpu_affinity().return_value = True
+ mock_process.get_children.return_value = []
+ mock_process.reset_mock()
+
+ cpu_mask = "1:2:4:5:10:15-17"
+ worker_cpu_mask = self.params['worker_cpu_mask']
+ hypervisor = hv_kvm.KVMHypervisor()
+
+ # This is necessary so that it provides the same object each time instead of
+ # overwriting it each time.
+ def get_mock_process(unused_pid):
+ return mock_process
+
+ with nested(mock.patch('psutil.Process', get_mock_process),
+ mock.patch('psutil.cpu_count', return_value=1237)):
+ hypervisor._ExecuteCpuAffinity('test_instance', cpu_mask, worker_cpu_mask)
+ self.assertEqual(mock_process.set_cpu_affinity.call_count, 7)
+ self.assertEqual(mock_process.set_cpu_affinity.call_args_list[0],
+ mock.call(range(0,1237)))
+ self.assertEqual(mock_process.set_cpu_affinity.call_args_list[6],
+ mock.call([15, 16, 17]))
+
+ def testCpuPinningEntireInstance(self):
+ if hv_kvm.psutil is None:
+ # FIXME: switch to unittest.skip once python 2.6 is deprecated
+ print "skipped 'psutil Python package not found'"
+ return
+ mock_process = mock.MagicMock()
+ mock_process.set_cpu_affinity = mock.MagicMock()
+ mock_process.set_cpu_affinity().return_value = True
+ mock_process.get_children.return_value = []
+ mock_process.reset_mock()
+
+ cpu_mask = "4"
+ worker_cpu_mask = "5"
+ hypervisor = hv_kvm.KVMHypervisor()
+
+ def get_mock_process(unused_pid):
+ return mock_process
+
+ with mock.patch('psutil.Process', get_mock_process):
+ hypervisor._ExecuteCpuAffinity('test_instance', cpu_mask, worker_cpu_mask)
+ self.assertEqual(mock_process.set_cpu_affinity.call_count, 7)
+ self.assertEqual(mock_process.set_cpu_affinity.call_args_list[0],
+ mock.call([5]))
+ self.assertEqual(mock_process.set_cpu_affinity.call_args_list[1],
+ mock.call([4]))
+
if __name__ == "__main__":
testutils.GanetiTestProgram()
diff --git a/test/py/ganeti.server.rapi_unittest.py b/test/py/ganeti.server.rapi_unittest.py
index ee879bd..d3b5a4a 100755
--- a/test/py/ganeti.server.rapi_unittest.py
+++ b/test/py/ganeti.server.rapi_unittest.py
@@ -46,7 +46,8 @@
from ganeti import http
from ganeti import objects
-import ganeti.rapi.baserlib
+from ganeti.rapi.auth.basic_auth import BasicAuthenticator
+from ganeti.rapi.auth import users_file
import ganeti.rapi.testutils
import ganeti.rapi.rlib2
import ganeti.http.auth
@@ -62,7 +63,8 @@
def _Test(self, method, path, headers, reqbody,
user_fn=NotImplemented, luxi_client=NotImplemented,
reqauth=False):
- rm = rapi.testutils._RapiMock(user_fn, luxi_client, reqauth=reqauth)
+ rm = rapi.testutils._RapiMock(BasicAuthenticator(user_fn), luxi_client,
+ reqauth=reqauth)
(resp_code, resp_headers, resp_body) = \
rm.FetchResponse(path, method, http.ParseHeaders(StringIO(headers)),
@@ -105,7 +107,8 @@
self.assertTrue(data["message"].startswith("Method PUT is unsupported"))
def testPostInstancesNoAuth(self):
- (code, _, _) = self._Test(http.HTTP_POST, "/2/instances", "", None)
+ (code, _, _) = self._Test(http.HTTP_POST, "/2/instances", "", None,
+ reqauth=True)
self.assertEqual(code, http.HttpUnauthorized.code)
def testRequestWithUnsupportedMediaType(self):
@@ -134,7 +137,8 @@
"%s: %s" % (http.HTTP_AUTHORIZATION, "Unsupported scheme"),
])
- (code, _, _) = self._Test(http.HTTP_POST, "/2/instances", headers, "")
+ (code, _, _) = self._Test(http.HTTP_POST, "/2/instances", headers, "",
+ reqauth=True)
self.assertEqual(code, http.HttpUnauthorized.code)
def testIncompleteBasicAuth(self):
@@ -142,7 +146,8 @@
"%s: Basic" % http.HTTP_AUTHORIZATION,
])
- (code, _, data) = self._Test(http.HTTP_POST, "/2/instances", headers, "")
+ (code, _, data) = self._Test(http.HTTP_POST, "/2/instances", headers, "",
+ reqauth=True)
self.assertEqual(code, http.HttpBadRequest.code)
self.assertEqual(data["message"],
"Basic authentication requires credentials")
@@ -154,8 +159,9 @@
"%s: Basic %s" % (http.HTTP_AUTHORIZATION, auth),
])
- (code, _, data) = self._Test(http.HTTP_POST, "/2/instances", headers, "")
- self.assertEqual(code, http.HttpUnauthorized.code)
+ (code, _, data) = self._Test(http.HTTP_POST, "/2/instances", headers, "",
+ reqauth=True)
+ self.assertEqual(code, http.HttpBadRequest.code)
@staticmethod
def _MakeAuthHeaders(username, password, correct_password):
@@ -178,16 +184,14 @@
def _LookupUserNoWrite(name):
if name == username:
- return http.auth.PasswordFileUser(name, password, [])
+ return users_file.PasswordFileUser(name, password, [])
else:
return None
for access in [rapi.RAPI_ACCESS_WRITE, rapi.RAPI_ACCESS_READ]:
def _LookupUserWithWrite(name):
if name == username:
- return http.auth.PasswordFileUser(name, password, [
- access,
- ])
+ return users_file.PasswordFileUser(name, password, [access])
else:
return None
@@ -198,7 +202,7 @@
for method in rapi.baserlib._SUPPORTED_METHODS:
# No authorization
- (code, _, _) = self._Test(method, path, "", "")
+ (code, _, _) = self._Test(method, path, "", "", reqauth=True)
if method in (http.HTTP_DELETE, http.HTTP_POST):
self.assertEqual(code, http.HttpNotImplemented.code)
@@ -208,22 +212,22 @@
# Incorrect user
(code, _, _) = self._Test(method, path, header_fn(True), "",
- user_fn=self._LookupWrongUser)
+ user_fn=self._LookupWrongUser, reqauth=True)
self.assertEqual(code, http.HttpUnauthorized.code)
# User has no write access, but the password is correct
(code, _, _) = self._Test(method, path, header_fn(True), "",
- user_fn=_LookupUserNoWrite)
+ user_fn=_LookupUserNoWrite, reqauth=True)
self.assertEqual(code, http.HttpForbidden.code)
# Wrong password and no write access
(code, _, _) = self._Test(method, path, header_fn(False), "",
- user_fn=_LookupUserNoWrite)
+ user_fn=_LookupUserNoWrite, reqauth=True)
self.assertEqual(code, http.HttpUnauthorized.code)
# Wrong password with write access
(code, _, _) = self._Test(method, path, header_fn(False), "",
- user_fn=_LookupUserWithWrite)
+ user_fn=_LookupUserWithWrite, reqauth=True)
self.assertEqual(code, http.HttpUnauthorized.code)
# Prepare request information
@@ -241,7 +245,8 @@
# User has write access, password is correct
(code, _, data) = self._Test(method, reqpath, header_fn(True), body,
user_fn=_LookupUserWithWrite,
- luxi_client=_FakeLuxiClientForQuery)
+ luxi_client=_FakeLuxiClientForQuery,
+ reqauth=True)
self.assertEqual(code, http.HTTP_OK)
self.assertTrue(objects.QueryResponse.FromDict(data))
@@ -250,10 +255,14 @@
for method in rapi.baserlib._SUPPORTED_METHODS:
for reqauth in [False, True]:
+ if method == http.HTTP_GET and not reqauth:
+ # we don't have a mock client to test this case
+ continue
# No authorization
- (code, _, _) = self._Test(method, path, "", "", reqauth=reqauth)
+ (code, _, _) = self._Test(method, path, "", "",
+ user_fn=lambda _ : None, reqauth=reqauth)
- if method == http.HTTP_GET or reqauth:
+ if method == http.HTTP_GET and reqauth:
self.assertEqual(code, http.HttpUnauthorized.code)
else:
self.assertEqual(code, http.HttpNotImplemented.code)
diff --git a/test/py/ganeti.storage.drbd_unittest.py b/test/py/ganeti.storage.drbd_unittest.py
index 902cc1b..830a3a0 100755
--- a/test/py/ganeti.storage.drbd_unittest.py
+++ b/test/py/ganeti.storage.drbd_unittest.py
@@ -50,6 +50,7 @@
"version: 8.0.12 (api:76/proto:86-91)",
"version: 8.2.7 (api:88/proto:0-100)",
"version: 8.3.7.49 (api:188/proto:13-191)",
+ "version: 8.4.7-1 (api:1/proto:86-101)",
]
result = [
{
@@ -83,6 +84,15 @@
"api": 188,
"proto": 13,
"proto2": "191",
+ },
+ {
+ "k_major": 8,
+ "k_minor": 4,
+ "k_point": 7,
+ "k_release": "1",
+ "api": 1,
+ "proto": 86,
+ "proto2": "101",
}
]
for d, r in zip(data, result):
diff --git a/test/py/ganeti.utils.retry_unittest.py b/test/py/ganeti.utils.retry_unittest.py
index 93638cd..8a53760 100755
--- a/test/py/ganeti.utils.retry_unittest.py
+++ b/test/py/ganeti.utils.retry_unittest.py
@@ -215,7 +215,7 @@
def testSuccessOnFirst(self):
test_fn = mock.Mock()
utils.RetryByNumberOfTimes(5, 0, Exception, test_fn)
- test_fn.assert_called_once()
+ test_fn.assert_called_once_with()
def testSuccessOnFirstWithArgs(self):
test_fn = mock.Mock()
diff --git a/test/py/mocks.py b/test/py/mocks.py
index b48125b..5c78918 100644
--- a/test/py/mocks.py
+++ b/test/py/mocks.py
@@ -58,7 +58,11 @@
return True
def GetNodeList(self):
- return ["a", "b", "c"]
+ return [
+ "01234567-89ab-cdef-fedc-aaaaaaaaaaaa",
+ "01234567-89ab-cdef-fedc-bbbbbbbbbbbb",
+ "01234567-89ab-cdef-fedc-cccccccccccc"
+ ]
def GetRsaHostKey(self):
return FAKE_CLUSTER_KEY
@@ -70,7 +74,7 @@
return "test.cluster"
def GetMasterNode(self):
- return "a"
+ return "01234567-89ab-cdef-fedc-aaaaaaaaaaaa"
def GetMasterNodeName(self):
return netutils.Hostname.GetSysName()
@@ -204,3 +208,6 @@
def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
# pylint: disable=W0613
return lu_result
+
+ def HooksAbortCallBack(self, phase, feedback_fn, exception):
+ pass
diff --git a/test/py/testutils/__init__.py b/test/py/testutils/__init__.py
index 27ca425..f2e63b5 100644
--- a/test/py/testutils/__init__.py
+++ b/test/py/testutils/__init__.py
@@ -37,6 +37,23 @@
import unittest
import logging
+# Unified patch_object for various versions of Python Mock.
+#
+# Different Python Mock versions provide incompatible versions of patching an
+# object. More recent versions use _patch_object, older ones used patch_object.
+# This unifies the different variations.
+import mock
+
+try:
+ # pylint: disable=W0212
+ _patcher = mock._patch_object
+except AttributeError:
+ # pylint: disable=E1101
+ try:
+ _patcher = mock.patch_object
+ except AttributeError:
+ _patcher = mock.patch.object
+
from ganeti import utils
@@ -235,20 +252,8 @@
def patch_object(*args, **kwargs):
- """Unified patch_object for various versions of Python Mock.
-
- Different Python Mock versions provide incompatible versions of patching an
- object. More recent versions use _patch_object, older ones used patch_object.
- This function unifies the different variations.
-
- """
- import mock
- try:
- # pylint: disable=W0212
- return mock._patch_object(*args, **kwargs)
- except AttributeError:
- # pylint: disable=E1101
- return mock.patch_object(*args, **kwargs)
+ """Unified patch_object for various versions of Python Mock."""
+ return _patcher(*args, **kwargs)
def UnifyValueType(data):