LiteSpeed Technologies
Download Download     Blog Blog     Wiki Wiki     Forum Forum     Store     Contact Contact    

Go Back   LiteSpeed Support Forums > LiteSpeed Web Server > Bug Reports > [Not Resolved] LSWS 4.1 - Spinlock

Reply
 
Thread Tools Display Modes
  #1  
Old 04-23-2011, 01:00 AM
MikeDVB MikeDVB is offline
Senior Member
 
Join Date: Jul 2009
Posts: 218
Default [Not Resolved] LSWS 4.1 - Spinlock

On LSWS 4.1 we're having LiteSpeed spinlock and stop serving requests, I did an strace of the process while this was happening but didn't manage to be able to force a core dump before this somehow resolved itself.

Here's some of the strace (it's pages long of the same thing):
Code:
epoll_wait(7, {{EPOLLIN, {u32=0, u64=532575944704}}, {EPOLLIN, {u32=0, u64=627065225216}}, {EPOLLIN, {u32=0, u64=382252089344}}, {EPOLLIN, {u32=0, u64=1533303324672}}, {EPOLLIN, {u32=0, u64=399431958528}}, {EPOLLIN, {u32=0, u64=528280977408}}, {EPOLLIN, {u32=0, u64=365072220160}}, {EPOLLIN, {u32=0, u64=176093659136}}, {EPOLLIN, {u32=0, u64=691489734656}}, {EPOLLIN, {u32=0, u64=708669603840}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=81604378624}}, {EPOLLIN, {u32=0, u64=373662154752}}, {EPOLLIN, {u32=0, u64=571230650368}}, {EPOLLIN, {u32=0, u64=47244640256}}, {EPOLLIN, {u32=0, u64=201863462912}}, {EPOLLIN, {u32=0, u64=1490353651712}}, {EPOLLIN, {u32=0, u64=141733920768}}, {EPOLLIN, {u32=0, u64=657129996288}}, {EPOLLIN, {u32=0, u64=111669149696}}, {EPOLLIN, {u32=0, u64=287762808832}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=193273528320}}, {EPOLLIN, {u32=0, u64=545460846592}}, {EPOLLIN, {u32=0, u64=536870912000}}, {EPOLLIN, {u32=0, u64=107374182400}}, {EPOLLIN, {u32=0, u64=588410519552}}, {EPOLLIN, {u32=0, u64=639950127104}}, {EPOLLIN, {u32=0, u64=1267015352320}}, {EPOLLIN, {u32=0, u64=584115552256}}, {EPOLLIN, {u32=0, u64=579820584960}}, {EPOLLIN, {u32=0, u64=743029342208}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=554050781184}}, {EPOLLIN, {u32=0, u64=459561500672}}, {EPOLLIN, {u32=0, u64=1249835483136}}, {EPOLLIN, {u32=0, u64=115964116992}}, {EPOLLIN, {u32=0, u64=614180323328}}, {EPOLLIN, {u32=0, u64=296352743424}}, {EPOLLIN, {u32=0, u64=210453397504}}, {EPOLLIN, {u32=0, u64=1597727834112}}, {EPOLLIN, {u32=0, u64=236223201280}}, {EPOLLOUT, {u32=0, u64=9590661971968}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=700079669248}}, {EPOLLIN, {u32=0, u64=506806140928}}, {EPOLLIN, {u32=0, u64=442381631488}}, {EPOLLIN, {u32=0, u64=682899800064}}, {EPOLLIN, {u32=0, u64=674309865472}}, {EPOLLIN, {u32=0, u64=425201762304}}, {EPOLLIN, {u32=0, u64=330712481792}}, {EPOLLIN, {u32=0, u64=562640715776}}, {EPOLLIN, {u32=0, u64=98784247808}}, {EPOLLIN, {u32=0, u64=631360192512}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=691489734656}}, {EPOLLIN, {u32=0, u64=176093659136}}, {EPOLLIN, {u32=0, u64=365072220160}}, {EPOLLIN, {u32=0, u64=528280977408}}, {EPOLLIN, {u32=0, u64=399431958528}}, {EPOLLIN, {u32=0, u64=1533303324672}}, {EPOLLIN, {u32=0, u64=382252089344}}, {EPOLLIN, {u32=0, u64=627065225216}}, {EPOLLIN, {u32=0, u64=532575944704}}, {EPOLLIN, {u32=0, u64=279172874240}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=111669149696}}, {EPOLLIN, {u32=0, u64=657129996288}}, {EPOLLIN, {u32=0, u64=141733920768}}, {EPOLLIN, {u32=0, u64=1490353651712}}, {EPOLLIN, {u32=0, u64=201863462912}}, {EPOLLIN, {u32=0, u64=47244640256}}, {EPOLLIN, {u32=0, u64=571230650368}}, {EPOLLIN, {u32=0, u64=373662154752}}, {EPOLLIN, {u32=0, u64=81604378624}}, {EPOLLIN, {u32=0, u64=708669603840}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=579820584960}}, {EPOLLIN, {u32=0, u64=584115552256}}, {EPOLLIN, {u32=0, u64=1267015352320}}, {EPOLLIN, {u32=0, u64=639950127104}}, {EPOLLIN, {u32=0, u64=588410519552}}, {EPOLLIN, {u32=0, u64=107374182400}}, {EPOLLIN, {u32=0, u64=536870912000}}, {EPOLLIN, {u32=0, u64=545460846592}}, {EPOLLIN, {u32=0, u64=193273528320}}, {EPOLLIN, {u32=0, u64=287762808832}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=236223201280}}, {EPOLLIN, {u32=0, u64=1597727834112}}, {EPOLLIN, {u32=0, u64=210453397504}}, {EPOLLIN, {u32=0, u64=296352743424}}, {EPOLLIN, {u32=0, u64=614180323328}}, {EPOLLIN, {u32=0, u64=115964116992}}, {EPOLLIN, {u32=0, u64=1249835483136}}, {EPOLLIN, {u32=0, u64=459561500672}}, {EPOLLIN, {u32=0, u64=554050781184}}, {EPOLLIN, {u32=0, u64=743029342208}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=98784247808}}, {EPOLLIN, {u32=0, u64=562640715776}}, {EPOLLIN, {u32=0, u64=330712481792}}, {EPOLLIN, {u32=0, u64=425201762304}}, {EPOLLIN, {u32=0, u64=674309865472}}, {EPOLLIN, {u32=0, u64=682899800064}}, {EPOLLIN, {u32=0, u64=442381631488}}, {EPOLLIN, {u32=0, u64=506806140928}}, {EPOLLIN, {u32=0, u64=700079669248}}, {EPOLLOUT, {u32=0, u64=9590661971968}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=532575944704}}, {EPOLLIN, {u32=0, u64=627065225216}}, {EPOLLIN, {u32=0, u64=382252089344}}, {EPOLLIN, {u32=0, u64=1533303324672}}, {EPOLLIN, {u32=0, u64=399431958528}}, {EPOLLIN, {u32=0, u64=528280977408}}, {EPOLLIN, {u32=0, u64=365072220160}}, {EPOLLIN, {u32=0, u64=176093659136}}, {EPOLLIN, {u32=0, u64=691489734656}}, {EPOLLIN, {u32=0, u64=631360192512}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=81604378624}}, {EPOLLIN, {u32=0, u64=373662154752}}, {EPOLLIN, {u32=0, u64=571230650368}}, {EPOLLIN, {u32=0, u64=47244640256}}, {EPOLLIN, {u32=0, u64=201863462912}}, {EPOLLIN, {u32=0, u64=1490353651712}}, {EPOLLIN, {u32=0, u64=141733920768}}, {EPOLLIN, {u32=0, u64=657129996288}}, {EPOLLIN, {u32=0, u64=111669149696}}, {EPOLLIN, {u32=0, u64=279172874240}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=193273528320}}, {EPOLLIN, {u32=0, u64=545460846592}}, {EPOLLIN, {u32=0, u64=536870912000}}, {EPOLLIN, {u32=0, u64=107374182400}}, {EPOLLIN, {u32=0, u64=588410519552}}, {EPOLLIN, {u32=0, u64=639950127104}}, {EPOLLIN, {u32=0, u64=1267015352320}}, {EPOLLIN, {u32=0, u64=584115552256}}, {EPOLLIN, {u32=0, u64=579820584960}}, {EPOLLIN, {u32=0, u64=708669603840}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=554050781184}}, {EPOLLIN, {u32=0, u64=459561500672}}, {EPOLLIN, {u32=0, u64=1249835483136}}, {EPOLLIN, {u32=0, u64=115964116992}}, {EPOLLIN, {u32=0, u64=614180323328}}, {EPOLLIN, {u32=0, u64=296352743424}}, {EPOLLIN, {u32=0, u64=210453397504}}, {EPOLLIN, {u32=0, u64=1597727834112}}, {EPOLLIN, {u32=0, u64=236223201280}}, {EPOLLIN, {u32=0, u64=287762808832}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=700079669248}}, {EPOLLIN, {u32=0, u64=506806140928}}, {EPOLLIN, {u32=0, u64=442381631488}}, {EPOLLIN, {u32=0, u64=682899800064}}, {EPOLLIN, {u32=0, u64=674309865472}}, {EPOLLIN, {u32=0, u64=425201762304}}, {EPOLLIN, {u32=0, u64=330712481792}}, {EPOLLIN, {u32=0, u64=562640715776}}, {EPOLLIN, {u32=0, u64=98784247808}}, {EPOLLIN, {u32=0, u64=743029342208}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=691489734656}}, {EPOLLIN, {u32=0, u64=176093659136}}, {EPOLLIN, {u32=0, u64=365072220160}}, {EPOLLIN, {u32=0, u64=528280977408}}, {EPOLLIN, {u32=0, u64=399431958528}}, {EPOLLIN, {u32=0, u64=1533303324672}}, {EPOLLIN, {u32=0, u64=382252089344}}, {EPOLLIN, {u32=0, u64=627065225216}}, {EPOLLIN, {u32=0, u64=532575944704}}, {EPOLLOUT, {u32=0, u64=9590661971968}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=111669149696}}, {EPOLLIN, {u32=0, u64=657129996288}}, {EPOLLIN, {u32=0, u64=141733920768}}, {EPOLLIN, {u32=0, u64=1490353651712}}, {EPOLLIN, {u32=0, u64=201863462912}}, {EPOLLIN, {u32=0, u64=47244640256}}, {EPOLLIN, {u32=0, u64=571230650368}}, {EPOLLIN, {u32=0, u64=373662154752}}, {EPOLLIN, {u32=0, u64=81604378624}}, {EPOLLIN, {u32=0, u64=631360192512}}}, 10, 100) = 10
It's done this three times tonight, with me only having the presence of mind to strace the process the last time around thanks to Tony at HawkHost. I fully expect it to happen again at which point I'm going to force a core dump and get it to bug@

I have had this happen on another server yesterday as well but it only happened once and hasn't happened since on that server and the hardware/software is identical.
__________________
Michael Denney - MDDHosting, LLC - Professional Hosting Solutions
LiteSpeed Powered - Shared, Reseller, Semi-Dedicated, and VPS
http://www.mddhosting.com/

Last edited by NiteWave; 05-08-2011 at 07:35 AM..
Reply With Quote
  #2  
Old 04-23-2011, 01:09 AM
MikeDVB MikeDVB is offline
Senior Member
 
Join Date: Jul 2009
Posts: 218
It happened again, and I forced a signal 11 core dump and have gotten it to bug@.

Looks to be an issue with epoll?

Code:
Reading symbols from /usr/local/lsws/bin/lshttpd...(no debugging symbols found)...done.

warning: core file may not match specified executable file.
[New Thread 7014]
[New Thread 7013]
Reading symbols from /lib64/libpthread.so.0...(no debugging symbols found)...done.
Loaded symbols for /lib64/libpthread.so.0
Reading symbols from /lib64/libm.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib64/libm.so.6
Reading symbols from /lib64/libc.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib64/libc.so.6
Reading symbols from /lib64/libcrypt.so.1...(no debugging symbols found)...done.
Loaded symbols for /lib64/libcrypt.so.1
Reading symbols from /lib64/libdl.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib64/libdl.so.2
Reading symbols from /lib64/ld-linux-x86-64.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib64/ld-linux-x86-64.so.2
Reading symbols from /lib64/libnss_files.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib64/libnss_files.so.2
Core was generated by `litespeed'.
Program terminated with signal 11, Segmentation fault.
#0  0x0000003e820d0f69 in syscall () from /lib64/libc.so.6
(gdb) bt
#0  0x0000003e820d0f69 in syscall () from /lib64/libc.so.6
#1  0x00000000004f679d in epoll::waitAndProcessEvents (this=0xa27cf10, iTimeoutMilliSec=100) at /httpd/edio/epoll.cpp:229
#2  0x000000000044f280 in EventDispatcher::run (this=0xa23e218) at /httpd/http/eventdispatcher.cpp:225
#3  0x000000000040fca5 in HttpServerImpl::start (this=0xa23e1f0) at /httpd/main/httpserver.cpp:475
#4  0x0000000000412cab in HttpServer::start (this=0x8772d0) at /httpd/main/httpserver.cpp:1849
#5  0x0000000000409eee in LshttpdMain::main (this=0xa23de30, argc=1, argv=0x7fff43247638) at /httpd/main/lshttpdmain.cpp:1761
#6  0x0000000000405a3f in main (argc=1, argv=0x7fff43247638) at /httpd/main.cpp:121
I've had to go to 4.1RC4 as this keeps happening on the release/debug version of 4.1.
__________________
Michael Denney - MDDHosting, LLC - Professional Hosting Solutions
LiteSpeed Powered - Shared, Reseller, Semi-Dedicated, and VPS
http://www.mddhosting.com/

Last edited by MikeDVB; 04-23-2011 at 01:20 AM..
Reply With Quote
  #3  
Old 04-23-2011, 01:59 AM
MikeDVB MikeDVB is offline
Senior Member
 
Join Date: Jul 2009
Posts: 218
I've been forced to roll back to 4.0.20 as 4.1RC4 and 4.1 are exhibiting this behavior on two separate systems.
__________________
Michael Denney - MDDHosting, LLC - Professional Hosting Solutions
LiteSpeed Powered - Shared, Reseller, Semi-Dedicated, and VPS
http://www.mddhosting.com/
Reply With Quote
  #4  
Old 04-24-2011, 10:01 PM
MikeDVB MikeDVB is offline
Senior Member
 
Join Date: Jul 2009
Posts: 218
George has gotten us a new debug build of the software as he believes he's resolved this issue. Apparently it's due to the incorrect accounting of concurrent connections for a vhost but we'll see as it shouldn't take long to find out if this fixed the issue or not.
__________________
Michael Denney - MDDHosting, LLC - Professional Hosting Solutions
LiteSpeed Powered - Shared, Reseller, Semi-Dedicated, and VPS
http://www.mddhosting.com/
Reply With Quote
  #5  
Old 04-26-2011, 12:56 AM
MikeDVB MikeDVB is offline
Senior Member
 
Join Date: Jul 2009
Posts: 218
Enough time has passed since George has gotten us the new build which resolved the concurrent connection bug that resolved our spinlock that I feel this can be marked resolved.
__________________
Michael Denney - MDDHosting, LLC - Professional Hosting Solutions
LiteSpeed Powered - Shared, Reseller, Semi-Dedicated, and VPS
http://www.mddhosting.com/
Reply With Quote
  #6  
Old 04-28-2011, 12:43 PM
MikeDVB MikeDVB is offline
Senior Member
 
Join Date: Jul 2009
Posts: 218
This isn't resolved, it just happened on another server.

Strace:
Code:
epoll_wait(8, {{EPOLLIN, {u32=0, u64=1082331758592}}, {EPOLLIN, {u32=0, u64=1099511627776}}, {EPOLLIN, {u32=0, u64=601295421440}}, {EPOLLIN, {u32=0, u64=146028888064}}, {EPOLLIN, {u32=0, u64=515396075520}}, {EPOLLIN, {u32=0, u64=455266533376}}, {EPOLLIN, {u32=0, u64=635655159808}}, {EPOLLIN, {u32=0, u64=566935683072}}, {EPOLLIN, {u32=0, u64=721554505728}}, {EPOLLIN, {u32=0, u64=910533066752}}}, 10, 100) = 10
epoll_wait(8, {{EPOLLIN, {u32=0, u64=68719476736}}, {EPOLLIN, {u32=0, u64=60129542144}}, {EPOLLIN, {u32=0, u64=51539607552}}, {EPOLLIN, {u32=0, u64=687194767360}}, {EPOLLIN, {u32=0, u64=987842478080}}, {EPOLLIN, {u32=0, u64=996432412672}}, {EPOLLIN, {u32=0, u64=678604832768}}, {EPOLLIN, {u32=0, u64=1120986464256}}, {EPOLLIN, {u32=0, u64=919123001344}}, {EPOLLIN, {u32=0, u64=695784701952}}}, 10, 100) = 10
epoll_wait(8, {{EPOLLIN, {u32=0, u64=644245094400}}, {EPOLLIN, {u32=0, u64=618475290624}}, {EPOLLIN, {u32=0, u64=609885356032}}, {EPOLLIN, {u32=0, u64=798863917056}}, {EPOLLIN, {u32=0, u64=532575944704}}, {EPOLLIN, {u32=0, u64=970662608896}}, {EPOLLIN, {u32=0, u64=773094113280}}, {EPOLLIN, {u32=0, u64=893353197568}}, {EPOLLIN, {u32=0, u64=292057776128}}, {EPOLLIN, {u32=0, u64=979252543488}}}, 10, 100) = 10
Backtrace:
Code:
[New Thread 23136]
[New Thread 23135]
Core was generated by `litespeed'.
Program terminated with signal 11, Segmentation fault.
#0  0x00000038ede0b150 in pthread_cond_timedwait@@GLIBC_2.3.2 ()
  from /lib64/libpthread.so.0
#0  0x00000038ede0b150 in pthread_cond_timedwait@@GLIBC_2.3.2 ()
  from /lib64/libpthread.so.0
#1  0x00000000004f77f7 in PThreadCond::wait (this=0x571fcb8, 
   pMutex=0x571fc90, lMilliSec=1000)
   at /home/gwang/release/httpd/httpd/thread/pthread/pthreadcond.cpp:33
#2  0x000000000041665b in PThreadWorkQueue<BlockOpReq>::get (this=0x571fc90, 
   pWork=0x42ec3db0, size=@0x42ec3dac, lMilliSec=1000)
   at /home/gwang/release/httpd/httpd/thread/pthread/pthreadworkqueue.h:147
#3  0x0000000000416563 in TWorkQueue<BlockOpReq, PThreadWorkQueue<BlockOpReq> >::get (this=0x571fc90, pWork=0x42ec3db0, size=@0x42ec3dac, lMilliSec=1000)
   at /home/gwang/release/httpd/httpd/thread/workqueue.h:37
#4  0x00000000004163da in WorkCrew<BlockOpReq, BlockOpDoWork, WorkQueue<BlockOpReq> >::TCrewMember<BlockOpReq, BlockOpDoWork, WorkQueue<BlockOpReq> >::run (
   this=0x5709030) at /home/gwang/release/httpd/httpd/thread/workcrew.h:307
#5  0x000000000041631c in PThreadImpl<WorkCrew<BlockOpReq, BlockOpDoWork, WorkQueue<BlockOpReq> >::TCrewMember<BlockOpReq, BlockOpDoWork, WorkQueue<BlockOpReq> > >::threadRoutine (arg=0x5709030)
   at /home/gwang/release/httpd/httpd/thread/pthread/pthreadimpl.h:98
#6  0x00000038ede0673d in start_thread () from /lib64/libpthread.so.0
#7  0x00000038ed6d44bd in clone () from /lib64/libc.so.6
__________________
Michael Denney - MDDHosting, LLC - Professional Hosting Solutions
LiteSpeed Powered - Shared, Reseller, Semi-Dedicated, and VPS
http://www.mddhosting.com/
Reply With Quote
Reply

Thread Tools
Display Modes

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is On
HTML code is Off

Forum Jump


All times are GMT -7. The time now is 11:43 AM.



- Archive - Top
© Copyright 2003-2011 LiteSpeed Technologies, Inc. All rights reserved. Privacy Policy.