Now when we have dropped solaris support. We can gladly use /proc and simplify parsing our code.
VSMAgent doesn't respond: > Traceback (most recent call last): > File "/opt/thinlinc/sbin/vsmagent", line 20, in <module> > VSMAgent ( sys . argv ) > File "/opt/thinlinc/modules/thinlinc/vsm/vsmagent.py", line 148, in __init__ > self . housekeeping ( ) > File "/opt/thinlinc/modules/thinlinc/vsm/vsmagent.py", line 180, in housekeeping > self . update_sshd_permitopen ( ) > File "/opt/thinlinc/modules/thinlinc/vsm/vsmagent.py", line 334, in >update_sshd_permitopen > II = get_tl_xvnc_processes ( ) > File "/opt/thinlinc/modules/thinlinc/ctccommon.py", line 461, in get_tl_xvnc_processes > Iii1I1111ii = os . readlink ( "/proc/%s/exe" % OooooOOoo0 ) > OSError: [Errno 2] No such file or directory: '/proc/2/exe'
(In reply to comment #6) > VSMAgent doesn't respond: > > > Traceback (most recent call last): > > File "/opt/thinlinc/sbin/vsmagent", line 20, in <module> > > VSMAgent ( sys . argv ) > > File "/opt/thinlinc/modules/thinlinc/vsm/vsmagent.py", line 148, in __init__ > > self . housekeeping ( ) > > File "/opt/thinlinc/modules/thinlinc/vsm/vsmagent.py", line 180, in housekeeping > > self . update_sshd_permitopen ( ) > > File "/opt/thinlinc/modules/thinlinc/vsm/vsmagent.py", line 334, in >update_sshd_permitopen > > II = get_tl_xvnc_processes ( ) > > File "/opt/thinlinc/modules/thinlinc/ctccommon.py", line 461, in get_tl_xvnc_processes > > Iii1I1111ii = os . readlink ( "/proc/%s/exe" % OooooOOoo0 ) > > OSError: [Errno 2] No such file or directory: '/proc/2/exe' Fixed in commit r32225
tlwebaccess fails when logging in: > 2017-02-23 11:02:49 INFO tlwebaccess[13776]: [::ffff:10.48.0.107] 'GET /websocket/tester/2 HTTP/1.1' 101 - > 2017-02-23 11:02:49 INFO tlwebaccess[13776]: [::ffff:10.48.0.107] Plain non-SSL (ws://) WebSocket connection > 2017-02-23 11:02:49 INFO tlwebaccess[13776]: [::ffff:10.48.0.107] Version hybi-13, base64: 'False' > 2017-02-23 11:02:49 INFO tlwebaccess[13776]: [::ffff:10.48.0.107] Path: '/websocket/tester/2' > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] ---------------------------------------- > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] Exception happened during processing of request from ('::ffff:10.48.0.107', 52231, 0, 0) > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] Traceback (most recent call last): > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/usr/lib64/python2.7/SocketServer.py", line 571, in process_request > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] self.finish_request(request, client_address) > 2017-02-23 11:02:49 INFO tlwebaccess[13778]: [::ffff:10.48.0.107] 'GET /app/images/handle_bg.svg HTTP/1.1' 200 - > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/opt/thinlinc/sbin/tlwebaccess", line 615, in finish_request > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] ForkingTLSServer . finish_request ( self , request , client_address ) > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/opt/thinlinc/modules/thinlinc/tlstunnel.py", line 81, in finish_request > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] self . TLSRequestHandlerClass ( request , client_address , self ) > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/opt/thinlinc/modules/thinlinc/websocket.py", line 102, in __init__ > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] SimpleHTTPRequestHandler.__init__(self, req, addr, server) > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/usr/lib64/python2.7/SocketServer.py", line 652, in __init__ > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] self.handle() > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/opt/thinlinc/sbin/tlwebaccess", line 79, in handle > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] thinlinc . websocket . WebSocketRequestHandler . handle ( self ) > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/opt/thinlinc/modules/thinlinc/websocket.py", line 543, in handle > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] SimpleHTTPRequestHandler.handle(self) > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/usr/lib64/python2.7/BaseHTTPServer.py", line 340, in handle > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] self.handle_one_request() > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/usr/lib64/python2.7/BaseHTTPServer.py", line 328, in handle_one_request > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] method() > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/opt/thinlinc/sbin/tlwebaccess", line 232, in do_GET > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] if not self . handle_websocket ( ) : > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/opt/thinlinc/sbin/tlwebaccess", line 272, in handle_websocket > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] return thinlinc . websocket . WebSocketRequestHandler . handle_websocket ( self ) > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/opt/thinlinc/modules/thinlinc/websocket.py", line 497, in handle_websocket > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] self.new_websocket_client() > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/opt/thinlinc/sbin/tlwebaccess", line 489, in new_websocket_client > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] o0oOoOO = thinlinc . ctccommon . get_tl_xvnc_processes ( ) > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] File "/opt/thinlinc/modules/thinlinc/ctccommon.py", line 495, in get_tl_xvnc_processes > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] result[int(pid)] = (uid, display, rfbport) > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] UnboundLocalError: local variable 'display' referenced before assignment > 2017-02-23 11:02:49 ERROR tlwebaccess[13776]: [::ffff:10.48.0.107] > ----------------------------------------
On startup of agent with latest jenkins build (build 553): > Traceback (most recent call last): > File "/opt/thinlinc/sbin/vsmagent", line 20, in <module> > VSMAgent ( sys . argv ) > File "/opt/thinlinc/modules/thinlinc/vsm/vsmagent.py", line 148, in __init__ > self . housekeeping ( ) > File "/opt/thinlinc/modules/thinlinc/vsm/vsmagent.py", line 180, in housekeeping > self . update_sshd_permitopen ( ) > File "/opt/thinlinc/modules/thinlinc/vsm/vsmagent.py", line 334, in update_sshd_permitopen > II = get_tl_xvnc_processes ( ) > File "/opt/thinlinc/modules/thinlinc/ctccommon.py", line 495, in get_tl_xvnc_processes > Oo0o [ int ( OooooOOoo0 ) ] = ( i11ii1iI , oooO , i1I1i111Ii ) > UnboundLocalError: local variable 'i1I1i111Ii' referenced before > assignment
Probably caused by a manually started Xvnc? $ ps aux | grep Xvnc samuel 8442 0.0 0.0 118520 948 pts/7 S+ 12:52 0:00 grep --color=auto Xvnc samuel 12535 0.0 0.8 155256 132576 pts/1 S+ jan23 15:35 /opt/thinlinc/libexec/Xvnc :50 -PasswordFile=/home/samuel/.vnc/passwd -ac -KeyboardMap=sv
(In reply to comment #11) > Probably caused by a manually started Xvnc? > > $ ps aux | grep Xvnc > samuel 8442 0.0 0.0 118520 948 pts/7 S+ 12:52 0:00 grep > --color=auto Xvnc > samuel 12535 0.0 0.8 155256 132576 pts/1 S+ jan23 15:35 > /opt/thinlinc/libexec/Xvnc :50 -PasswordFile=/home/samuel/.vnc/passwd -ac > -KeyboardMap=sv No it was a bug introduced in commit 32225, fixed in commit 32226
(In reply to comment #12) > No it was a bug introduced in commit 32225, fixed in commit 32226 It still happens with a build after commit 32226. If I terminate the non-thinlinc Xvnc process, vsmagent starts without tracebacks.
Tested jenkins build 554.
The load balancer changes will be tested on bug 6167, so only get_tl_xvnc_processes() will be tested here. I also grepped for remaining calls to "ps" and could not find anything in the source tree.
Tested: - tlwebaccess - tl-notify - permitopen All work well. I also tested removing the Xvnc binary to check that the "(deleted)" case was handler properly, and I found no issues. Finally I checked the code and couldn't see any new ways of fooling it or making it crash.
This change produces the following traceback reproducible on a low performance agent: 2017-04-04 15:17:34 INFO vsmagent.session: Verified connectivity to newly started Xvnc for cendio@lab.lkpg.cendio.se Traceback (most recent call last): File "/opt/thinlinc/sbin/vsmagent", line 20, in <module> VSMAgent ( sys . argv ) File "/opt/thinlinc/modules/thinlinc/vsm/vsmagent.py", line 167, in __init__ self . loop ( ) File "/opt/thinlinc/modules/thinlinc/vsm/async.py", line 423, in loop I11I11i1I = self . run_delayed_calls ( ) File "/opt/thinlinc/modules/thinlinc/vsm/async.py", line 381, in run_delayed_calls oOO0OOOo . func ( * oOO0OOOo . args , ** oOO0OOOo . kw ) File "/opt/thinlinc/modules/thinlinc/vsm/vsmagent.py", line 177, in housekeeping self . loadinfokeeper . fetch_dynamic_loadinfo ( ) File "/opt/thinlinc/modules/thinlinc/vsm/loadinfokeeper.py", line 161, in fetch_dynamic_loadinfo Ii1i11IIii1I [ O00 ( int ( O0o0Ooo ) ) ] = 1 File "/opt/thinlinc/modules/thinlinc/vsm/loadinfokeeper.py", line 178, in O00 O00Oo000ooO0 = o00OO00OoO . readlines ( ) IOError: [Errno 3] No such process Fix would be something like this: Index: loadinfokeeper.py =================================================================== --- loadinfokeeper.py (revision 32320) +++ loadinfokeeper.py (working copy) @@ -172,14 +172,13 @@ def _get_real_uid_for_pid(pid): try: f = open("/proc/%s/status" % pid) + + lines = f.readlines() + for line in lines: + if line.startswith("Uid:"): + fields = line.split() + return fields[1] except: - return -1 + pass - lines = f.readlines() - for line in lines: - if line.startswith("Uid:"): - fields = line.split() - return fields[1] - - # Should never be reached, return -1 as uid return -1
I was unable to get my system to a state where I could provoke this issue, so instead I inspected the code and added a unit test that could show that it was indeed fixed.
Upgrading from 4.7.0 on RHEL 5 loses sessions, and it seems to be caused by the code changed in this bug: > 2017-05-18 10:49:15 WARNING vsmagent.session: Broken session for user cendio, tl-session pid 4869 is not tl-session > [root@dhcp-253-137 tl-4.8.0-server]# ps fax| grep 4869 > 6401 pts/1 S+ 0:00 | \_ grep 4869 > 4869 ? S 0:00 tl-session: cendio
Wrong bug. Meant for bug 6142.