Skip to content

Commit 2bef63a

Browse files
authored
Add xapi-ssh-monitor script and service (#6661)
Original code by @LunfanZhang and @BengangY.
2 parents 234aef7 + cfee0d8 commit 2bef63a

File tree

3 files changed

+313
-0
lines changed

3 files changed

+313
-0
lines changed

scripts/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ install:
137137
mkdir -p $(DESTDIR)/etc/cron.d
138138
$(IDATA) xapi-tracing-log-trim.cron $(DESTDIR)/etc/cron.d/xapi-tracing-log-trim.cron
139139
mkdir -p $(DESTDIR)/opt/xensource/gpg
140+
$(IPROG) xapi-ssh-monitor $(DESTDIR)$(OPTDIR)/bin
141+
$(IDATA) xapi-ssh-monitor.service $(DESTDIR)/usr/lib/systemd/system/xapi-ssh-monitor.service
140142
# host-backup-restore
141143
$(IPROG) host-backup-restore/host-backup $(DESTDIR)$(LIBEXECDIR)
142144
$(IPROG) host-backup-restore/host-restore $(DESTDIR)$(LIBEXECDIR)

scripts/xapi-ssh-monitor

Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
#!/usr/bin/env python3
2+
3+
import time
4+
import subprocess
5+
import logging
6+
import os.path
7+
import signal
8+
import sys
9+
import re
10+
import XenAPI
11+
import threading
12+
from enum import Enum, auto
13+
from typing import Tuple, List, Optional, Dict, Any
14+
import traceback
15+
16+
# Configure logging
17+
log_format = '%(asctime)s - %(levelname)s - %(message)s'
18+
log_level = logging.INFO
19+
20+
logging.basicConfig(
21+
level=log_level,
22+
format=log_format,
23+
handlers=[
24+
logging.StreamHandler(),
25+
logging.FileHandler('/var/log/daemon.log')
26+
]
27+
)
28+
29+
logger = logging.getLogger(__name__)
30+
31+
# Constants
32+
class SshState(Enum):
33+
DOWN = auto()
34+
ACTIVE = auto()
35+
UNKNOWN = auto()
36+
37+
INSTALLATION_UUID_REGEX = re.compile("^INSTALLATION_UUID")
38+
39+
def match_host_id(s):
40+
return INSTALLATION_UUID_REGEX.search(s, 0)
41+
42+
class XapiMonitor:
43+
XAPI_HEALTH_CHECK = '/opt/xensource/libexec/xapi-health-check'
44+
45+
def __init__(self):
46+
self.logger = logging.getLogger(__name__)
47+
self.running = True
48+
self.session = None
49+
self.localhost_uuid = self.get_localhost_uuid()
50+
# Create event for graceful exit
51+
self.exit_event = threading.Event()
52+
signal.signal(signal.SIGTERM, self._handle_signal)
53+
signal.signal(signal.SIGINT, self._handle_signal)
54+
signal.signal(signal.SIGHUP, self._handle_signal)
55+
56+
def _handle_signal(self, signum, frame):
57+
"""Handle termination signals"""
58+
signal_names = {
59+
signal.SIGTERM: "SIGTERM",
60+
signal.SIGINT: "SIGINT",
61+
signal.SIGHUP: "SIGHUP"
62+
}
63+
signal_name = signal_names.get(signum, f"Signal {signum}")
64+
self.logger.info(f"Received {signal_name}, preparing to exit...")
65+
self.running = False
66+
# Set event to interrupt any waiting
67+
self.exit_event.set()
68+
69+
def _create_session(self) -> Optional[Any]:
70+
"""Create a session with local XAPI"""
71+
try:
72+
session = XenAPI.xapi_local()
73+
session.login_with_password("", "")
74+
return session
75+
except Exception as e:
76+
self.logger.error(f"Create XAPI session failed: {e}")
77+
return None
78+
79+
def _logout_session(self) -> None:
80+
"""Logout from XAPI session"""
81+
try:
82+
if self.session:
83+
self.session.logout()
84+
self.logger.debug("XAPI session logged out")
85+
except Exception as e:
86+
self.logger.warning(f"Error during session logout: {e}")
87+
88+
@staticmethod
89+
def get_localhost_uuid() -> str:
90+
"""Get the UUID of the local host from inventory file"""
91+
filename = '/etc/xensource-inventory'
92+
try:
93+
with open(filename, 'r') as f:
94+
for line in filter(match_host_id, f.readlines()):
95+
return line.split("'")[1]
96+
except Exception as e:
97+
error_msg = f"Unable to open inventory file [{filename}]: {e}"
98+
logging.getLogger(__name__).error(error_msg)
99+
raise RuntimeError(error_msg)
100+
101+
# If we get here, we didn't find the UUID
102+
error_msg = f"Could not find INSTALLATION_UUID in {filename}"
103+
logging.getLogger(__name__).error(error_msg)
104+
raise RuntimeError(error_msg)
105+
106+
def _run_command(self, command: List[str], timeout: int = 10) -> Tuple[int, str, str]:
107+
"""Execute command and return results
108+
109+
Args:
110+
command: Command to execute as list of strings
111+
timeout: Command execution timeout in seconds (default: 10)
112+
113+
Returns:
114+
Tuple of (return_code, stdout, stderr)
115+
"""
116+
self.logger.debug(f"Running command: {' '.join(command)}")
117+
try:
118+
process = subprocess.Popen(
119+
command,
120+
stdout=subprocess.PIPE,
121+
stderr=subprocess.PIPE,
122+
universal_newlines=True
123+
)
124+
try:
125+
stdout, stderr = process.communicate(timeout=timeout)
126+
self.logger.debug(f"Command returned: {process.returncode}")
127+
return process.returncode, stdout, stderr
128+
except subprocess.TimeoutExpired:
129+
process.kill()
130+
process.communicate()
131+
self.logger.error(f"Command execution timeout after {timeout}s: {' '.join(command)}")
132+
return -1, "", "Timeout"
133+
except Exception as e:
134+
self.logger.error(f"Error executing command: {e}")
135+
return -1, "", str(e)
136+
137+
def _check_xapi_health(self) -> bool:
138+
"""Check XAPI health status with extended timeout"""
139+
self.logger.debug("Performing XAPI health check")
140+
returncode, stdout, stderr = self._run_command([self.XAPI_HEALTH_CHECK], timeout=120)
141+
142+
if returncode != 0:
143+
self.logger.warning(f"XAPI health check failed: {stderr}")
144+
145+
return returncode == 0
146+
147+
def _get_ssh_state(self) -> SshState:
148+
"""Get SSH service status"""
149+
returncode, stdout, stderr = self._run_command(['systemctl', 'is-active', 'sshd'])
150+
status = stdout.strip()
151+
152+
if status == 'active':
153+
return SshState.ACTIVE
154+
if status in ('inactive', 'failed', 'unknown'):
155+
return SshState.DOWN
156+
157+
self.logger.warning(f"Unexpected SSH status: {status}, stderr: {stderr}")
158+
return SshState.UNKNOWN
159+
160+
def _control_ssh_service(self, enable: bool) -> bool:
161+
"""Control SSH service
162+
163+
Returns:
164+
bool: True if operation was successful, False otherwise
165+
"""
166+
action = "starting" if enable else "stopping"
167+
try:
168+
firewall_cmd = '/usr/bin/firewall-cmd'
169+
use_firewalld = os.path.exists(firewall_cmd)
170+
if enable:
171+
if use_firewalld:
172+
ret0, _, stderr0 = self._run_command([firewall_cmd, '--add-service', 'ssh'])
173+
else:
174+
ret0, stderr0 = 0, "n/a"
175+
ret1, _, stderr1 = self._run_command(['systemctl', 'enable', 'sshd'])
176+
ret2, _, stderr2 = self._run_command(['systemctl', 'start', 'sshd'])
177+
success = (ret0 == 0 and ret1 == 0 and ret2 == 0)
178+
else:
179+
ret2, _, stderr2 = self._run_command(['systemctl', 'stop', 'sshd'])
180+
ret1, _, stderr1 = self._run_command(['systemctl', 'disable', 'sshd'])
181+
if use_firewalld:
182+
ret0, _, stderr0 = self._run_command([firewall_cmd, '--remove-service', 'ssh'])
183+
else:
184+
ret0, stderr0 = 0, "n/a"
185+
success = (ret0 == 0 and ret1 == 0 and ret2 == 0)
186+
187+
if success:
188+
self.logger.info(f"SSH service {action} successful")
189+
else:
190+
err_msg = f"""SSH service {action} failed: enable/disable firewalld service stderr: {stderr0},
191+
enable/disable sshd stderr: {stderr1}, start/stop sshd stderr: {stderr2} """
192+
self.logger.error(err_msg)
193+
194+
return success
195+
except Exception as e:
196+
self.logger.error(f"SSH service {action} failed with exception: {e}")
197+
self.logger.debug(traceback.format_exc())
198+
return False
199+
200+
def _disable_ssh_via_api(self) -> bool:
201+
"""Disable SSH via XAPI, max retries 3 times"""
202+
if not self.session:
203+
self.session = self._create_session()
204+
if not self.session:
205+
return False
206+
207+
retry_count = 0
208+
max_retries = 3
209+
retry_interval = 5
210+
211+
while retry_count < max_retries and self.running:
212+
try:
213+
host = self.session.xenapi.host.get_by_uuid(self.localhost_uuid)
214+
self.session.xenapi.host.disable_ssh(host)
215+
self.logger.info("Successfully disabled SSH via XAPI")
216+
return True
217+
except Exception as e:
218+
retry_count += 1
219+
self.logger.warning(f"Disable SSH via API failed ({retry_count}/{max_retries}): {e}")
220+
if retry_count < max_retries and self.running:
221+
# Use interruptible sleep
222+
if self.exit_event.wait(retry_interval):
223+
return False
224+
self._logout_session()
225+
self.session = self._create_session()
226+
227+
if not self.running:
228+
return False
229+
230+
self.logger.error(f"Disable SSH via API failed, max retries reached ({max_retries})")
231+
return False
232+
233+
def run(self):
234+
"""Main monitoring loop"""
235+
self.logger.info("Starting XAPI and SSH service monitoring...")
236+
237+
self.session = self._create_session()
238+
if not self.session:
239+
self.logger.warning("Initial session creation failed, will retry later")
240+
241+
while self.running:
242+
try:
243+
# Check XAPI health - always perform the check
244+
xapi_healthy = self._check_xapi_health()
245+
246+
# Get current SSH state
247+
current_ssh_state = self._get_ssh_state()
248+
self.logger.debug(f"Current SSH state: {current_ssh_state}")
249+
250+
if xapi_healthy:
251+
if current_ssh_state == SshState.ACTIVE:
252+
self.logger.info("XAPI healthy: Stopping SSH service")
253+
if not self._disable_ssh_via_api():
254+
self.logger.warning("Disable SSH via API failed, keeping SSH service running")
255+
else:
256+
if current_ssh_state != SshState.ACTIVE:
257+
self.logger.info("XAPI unhealthy: Starting SSH service")
258+
self._control_ssh_service(True)
259+
260+
except Exception as e:
261+
self.logger.error(f"Runtime error: {e}")
262+
self.logger.debug(traceback.format_exc())
263+
264+
self._logout_session()
265+
266+
self.session = None
267+
268+
# Use interruptible sleep with a fixed interval when there is an error
269+
if self.exit_event.wait(5):
270+
break
271+
272+
continue
273+
274+
# Use interruptible sleep for main loop
275+
if self.exit_event.wait(60):
276+
break
277+
278+
self._logout_session()
279+
280+
self.logger.info("Monitoring service stopped")
281+
282+
def main():
283+
logger.info(f"SSH Control Service starting (PID: {os.getpid()})")
284+
285+
try:
286+
monitor = XapiMonitor()
287+
monitor.run()
288+
except Exception as e:
289+
logger.critical(f"Fatal error in main process: {e}")
290+
logger.critical(traceback.format_exc())
291+
sys.exit(1)
292+
293+
logger.info("SSH Control Service exited normally")
294+
sys.exit(0)
295+
296+
if __name__ == '__main__':
297+
main()

scripts/xapi-ssh-monitor.service

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[Unit]
2+
Description=XAPI SSH monitor service
3+
After=network.target
4+
After=xapi.service
5+
OnFailure=sshd.service
6+
7+
[Service]
8+
Type=simple
9+
RemainAfterExit=true
10+
ExecStart=/opt/xensource/bin/xapi-ssh-monitor
11+
ExecStop=/bin/true
12+
13+
[Install]
14+
WantedBy=multi-user.target

0 commit comments

Comments
 (0)