|
|

04-23-2011, 01:00 AM
|
|
Senior Member
|
|
Join Date: Jul 2009
Posts: 218
|
|
[Not Resolved] LSWS 4.1 - Spinlock
On LSWS 4.1 we're having LiteSpeed spinlock and stop serving requests, I did an strace of the process while this was happening but didn't manage to be able to force a core dump before this somehow resolved itself.
Here's some of the strace (it's pages long of the same thing):
Code:
epoll_wait(7, {{EPOLLIN, {u32=0, u64=532575944704}}, {EPOLLIN, {u32=0, u64=627065225216}}, {EPOLLIN, {u32=0, u64=382252089344}}, {EPOLLIN, {u32=0, u64=1533303324672}}, {EPOLLIN, {u32=0, u64=399431958528}}, {EPOLLIN, {u32=0, u64=528280977408}}, {EPOLLIN, {u32=0, u64=365072220160}}, {EPOLLIN, {u32=0, u64=176093659136}}, {EPOLLIN, {u32=0, u64=691489734656}}, {EPOLLIN, {u32=0, u64=708669603840}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=81604378624}}, {EPOLLIN, {u32=0, u64=373662154752}}, {EPOLLIN, {u32=0, u64=571230650368}}, {EPOLLIN, {u32=0, u64=47244640256}}, {EPOLLIN, {u32=0, u64=201863462912}}, {EPOLLIN, {u32=0, u64=1490353651712}}, {EPOLLIN, {u32=0, u64=141733920768}}, {EPOLLIN, {u32=0, u64=657129996288}}, {EPOLLIN, {u32=0, u64=111669149696}}, {EPOLLIN, {u32=0, u64=287762808832}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=193273528320}}, {EPOLLIN, {u32=0, u64=545460846592}}, {EPOLLIN, {u32=0, u64=536870912000}}, {EPOLLIN, {u32=0, u64=107374182400}}, {EPOLLIN, {u32=0, u64=588410519552}}, {EPOLLIN, {u32=0, u64=639950127104}}, {EPOLLIN, {u32=0, u64=1267015352320}}, {EPOLLIN, {u32=0, u64=584115552256}}, {EPOLLIN, {u32=0, u64=579820584960}}, {EPOLLIN, {u32=0, u64=743029342208}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=554050781184}}, {EPOLLIN, {u32=0, u64=459561500672}}, {EPOLLIN, {u32=0, u64=1249835483136}}, {EPOLLIN, {u32=0, u64=115964116992}}, {EPOLLIN, {u32=0, u64=614180323328}}, {EPOLLIN, {u32=0, u64=296352743424}}, {EPOLLIN, {u32=0, u64=210453397504}}, {EPOLLIN, {u32=0, u64=1597727834112}}, {EPOLLIN, {u32=0, u64=236223201280}}, {EPOLLOUT, {u32=0, u64=9590661971968}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=700079669248}}, {EPOLLIN, {u32=0, u64=506806140928}}, {EPOLLIN, {u32=0, u64=442381631488}}, {EPOLLIN, {u32=0, u64=682899800064}}, {EPOLLIN, {u32=0, u64=674309865472}}, {EPOLLIN, {u32=0, u64=425201762304}}, {EPOLLIN, {u32=0, u64=330712481792}}, {EPOLLIN, {u32=0, u64=562640715776}}, {EPOLLIN, {u32=0, u64=98784247808}}, {EPOLLIN, {u32=0, u64=631360192512}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=691489734656}}, {EPOLLIN, {u32=0, u64=176093659136}}, {EPOLLIN, {u32=0, u64=365072220160}}, {EPOLLIN, {u32=0, u64=528280977408}}, {EPOLLIN, {u32=0, u64=399431958528}}, {EPOLLIN, {u32=0, u64=1533303324672}}, {EPOLLIN, {u32=0, u64=382252089344}}, {EPOLLIN, {u32=0, u64=627065225216}}, {EPOLLIN, {u32=0, u64=532575944704}}, {EPOLLIN, {u32=0, u64=279172874240}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=111669149696}}, {EPOLLIN, {u32=0, u64=657129996288}}, {EPOLLIN, {u32=0, u64=141733920768}}, {EPOLLIN, {u32=0, u64=1490353651712}}, {EPOLLIN, {u32=0, u64=201863462912}}, {EPOLLIN, {u32=0, u64=47244640256}}, {EPOLLIN, {u32=0, u64=571230650368}}, {EPOLLIN, {u32=0, u64=373662154752}}, {EPOLLIN, {u32=0, u64=81604378624}}, {EPOLLIN, {u32=0, u64=708669603840}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=579820584960}}, {EPOLLIN, {u32=0, u64=584115552256}}, {EPOLLIN, {u32=0, u64=1267015352320}}, {EPOLLIN, {u32=0, u64=639950127104}}, {EPOLLIN, {u32=0, u64=588410519552}}, {EPOLLIN, {u32=0, u64=107374182400}}, {EPOLLIN, {u32=0, u64=536870912000}}, {EPOLLIN, {u32=0, u64=545460846592}}, {EPOLLIN, {u32=0, u64=193273528320}}, {EPOLLIN, {u32=0, u64=287762808832}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=236223201280}}, {EPOLLIN, {u32=0, u64=1597727834112}}, {EPOLLIN, {u32=0, u64=210453397504}}, {EPOLLIN, {u32=0, u64=296352743424}}, {EPOLLIN, {u32=0, u64=614180323328}}, {EPOLLIN, {u32=0, u64=115964116992}}, {EPOLLIN, {u32=0, u64=1249835483136}}, {EPOLLIN, {u32=0, u64=459561500672}}, {EPOLLIN, {u32=0, u64=554050781184}}, {EPOLLIN, {u32=0, u64=743029342208}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=98784247808}}, {EPOLLIN, {u32=0, u64=562640715776}}, {EPOLLIN, {u32=0, u64=330712481792}}, {EPOLLIN, {u32=0, u64=425201762304}}, {EPOLLIN, {u32=0, u64=674309865472}}, {EPOLLIN, {u32=0, u64=682899800064}}, {EPOLLIN, {u32=0, u64=442381631488}}, {EPOLLIN, {u32=0, u64=506806140928}}, {EPOLLIN, {u32=0, u64=700079669248}}, {EPOLLOUT, {u32=0, u64=9590661971968}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=532575944704}}, {EPOLLIN, {u32=0, u64=627065225216}}, {EPOLLIN, {u32=0, u64=382252089344}}, {EPOLLIN, {u32=0, u64=1533303324672}}, {EPOLLIN, {u32=0, u64=399431958528}}, {EPOLLIN, {u32=0, u64=528280977408}}, {EPOLLIN, {u32=0, u64=365072220160}}, {EPOLLIN, {u32=0, u64=176093659136}}, {EPOLLIN, {u32=0, u64=691489734656}}, {EPOLLIN, {u32=0, u64=631360192512}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=81604378624}}, {EPOLLIN, {u32=0, u64=373662154752}}, {EPOLLIN, {u32=0, u64=571230650368}}, {EPOLLIN, {u32=0, u64=47244640256}}, {EPOLLIN, {u32=0, u64=201863462912}}, {EPOLLIN, {u32=0, u64=1490353651712}}, {EPOLLIN, {u32=0, u64=141733920768}}, {EPOLLIN, {u32=0, u64=657129996288}}, {EPOLLIN, {u32=0, u64=111669149696}}, {EPOLLIN, {u32=0, u64=279172874240}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=193273528320}}, {EPOLLIN, {u32=0, u64=545460846592}}, {EPOLLIN, {u32=0, u64=536870912000}}, {EPOLLIN, {u32=0, u64=107374182400}}, {EPOLLIN, {u32=0, u64=588410519552}}, {EPOLLIN, {u32=0, u64=639950127104}}, {EPOLLIN, {u32=0, u64=1267015352320}}, {EPOLLIN, {u32=0, u64=584115552256}}, {EPOLLIN, {u32=0, u64=579820584960}}, {EPOLLIN, {u32=0, u64=708669603840}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=554050781184}}, {EPOLLIN, {u32=0, u64=459561500672}}, {EPOLLIN, {u32=0, u64=1249835483136}}, {EPOLLIN, {u32=0, u64=115964116992}}, {EPOLLIN, {u32=0, u64=614180323328}}, {EPOLLIN, {u32=0, u64=296352743424}}, {EPOLLIN, {u32=0, u64=210453397504}}, {EPOLLIN, {u32=0, u64=1597727834112}}, {EPOLLIN, {u32=0, u64=236223201280}}, {EPOLLIN, {u32=0, u64=287762808832}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=700079669248}}, {EPOLLIN, {u32=0, u64=506806140928}}, {EPOLLIN, {u32=0, u64=442381631488}}, {EPOLLIN, {u32=0, u64=682899800064}}, {EPOLLIN, {u32=0, u64=674309865472}}, {EPOLLIN, {u32=0, u64=425201762304}}, {EPOLLIN, {u32=0, u64=330712481792}}, {EPOLLIN, {u32=0, u64=562640715776}}, {EPOLLIN, {u32=0, u64=98784247808}}, {EPOLLIN, {u32=0, u64=743029342208}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=691489734656}}, {EPOLLIN, {u32=0, u64=176093659136}}, {EPOLLIN, {u32=0, u64=365072220160}}, {EPOLLIN, {u32=0, u64=528280977408}}, {EPOLLIN, {u32=0, u64=399431958528}}, {EPOLLIN, {u32=0, u64=1533303324672}}, {EPOLLIN, {u32=0, u64=382252089344}}, {EPOLLIN, {u32=0, u64=627065225216}}, {EPOLLIN, {u32=0, u64=532575944704}}, {EPOLLOUT, {u32=0, u64=9590661971968}}}, 10, 100) = 10
epoll_wait(7, {{EPOLLIN, {u32=0, u64=111669149696}}, {EPOLLIN, {u32=0, u64=657129996288}}, {EPOLLIN, {u32=0, u64=141733920768}}, {EPOLLIN, {u32=0, u64=1490353651712}}, {EPOLLIN, {u32=0, u64=201863462912}}, {EPOLLIN, {u32=0, u64=47244640256}}, {EPOLLIN, {u32=0, u64=571230650368}}, {EPOLLIN, {u32=0, u64=373662154752}}, {EPOLLIN, {u32=0, u64=81604378624}}, {EPOLLIN, {u32=0, u64=631360192512}}}, 10, 100) = 10
It's done this three times tonight, with me only having the presence of mind to strace the process the last time around thanks to Tony at HawkHost. I fully expect it to happen again at which point I'm going to force a core dump and get it to bug@
I have had this happen on another server yesterday as well but it only happened once and hasn't happened since on that server and the hardware/software is identical.
__________________
█ Michael Denney - MDDHosting, LLC - Professional Hosting Solutions
█ LiteSpeed Powered - Shared, Reseller, Semi-Dedicated, and VPS
█ http://www.mddhosting.com/
Last edited by NiteWave; 05-08-2011 at 07:35 AM..
|

04-23-2011, 01:09 AM
|
|
Senior Member
|
|
Join Date: Jul 2009
Posts: 218
|
|
It happened again, and I forced a signal 11 core dump and have gotten it to bug@.
Looks to be an issue with epoll?
Code:
Reading symbols from /usr/local/lsws/bin/lshttpd...(no debugging symbols found)...done.
warning: core file may not match specified executable file.
[New Thread 7014]
[New Thread 7013]
Reading symbols from /lib64/libpthread.so.0...(no debugging symbols found)...done.
Loaded symbols for /lib64/libpthread.so.0
Reading symbols from /lib64/libm.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib64/libm.so.6
Reading symbols from /lib64/libc.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib64/libc.so.6
Reading symbols from /lib64/libcrypt.so.1...(no debugging symbols found)...done.
Loaded symbols for /lib64/libcrypt.so.1
Reading symbols from /lib64/libdl.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib64/libdl.so.2
Reading symbols from /lib64/ld-linux-x86-64.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib64/ld-linux-x86-64.so.2
Reading symbols from /lib64/libnss_files.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib64/libnss_files.so.2
Core was generated by `litespeed'.
Program terminated with signal 11, Segmentation fault.
#0 0x0000003e820d0f69 in syscall () from /lib64/libc.so.6
(gdb) bt
#0 0x0000003e820d0f69 in syscall () from /lib64/libc.so.6
#1 0x00000000004f679d in epoll::waitAndProcessEvents (this=0xa27cf10, iTimeoutMilliSec=100) at /httpd/edio/epoll.cpp:229
#2 0x000000000044f280 in EventDispatcher::run (this=0xa23e218) at /httpd/http/eventdispatcher.cpp:225
#3 0x000000000040fca5 in HttpServerImpl::start (this=0xa23e1f0) at /httpd/main/httpserver.cpp:475
#4 0x0000000000412cab in HttpServer::start (this=0x8772d0) at /httpd/main/httpserver.cpp:1849
#5 0x0000000000409eee in LshttpdMain::main (this=0xa23de30, argc=1, argv=0x7fff43247638) at /httpd/main/lshttpdmain.cpp:1761
#6 0x0000000000405a3f in main (argc=1, argv=0x7fff43247638) at /httpd/main.cpp:121
I've had to go to 4.1RC4 as this keeps happening on the release/debug version of 4.1.
__________________
█ Michael Denney - MDDHosting, LLC - Professional Hosting Solutions
█ LiteSpeed Powered - Shared, Reseller, Semi-Dedicated, and VPS
█ http://www.mddhosting.com/
Last edited by MikeDVB; 04-23-2011 at 01:20 AM..
|

04-23-2011, 01:59 AM
|
|
Senior Member
|
|
Join Date: Jul 2009
Posts: 218
|
|
|
I've been forced to roll back to 4.0.20 as 4.1RC4 and 4.1 are exhibiting this behavior on two separate systems.
__________________
█ Michael Denney - MDDHosting, LLC - Professional Hosting Solutions
█ LiteSpeed Powered - Shared, Reseller, Semi-Dedicated, and VPS
█ http://www.mddhosting.com/
|

04-24-2011, 10:01 PM
|
|
Senior Member
|
|
Join Date: Jul 2009
Posts: 218
|
|
|
George has gotten us a new debug build of the software as he believes he's resolved this issue. Apparently it's due to the incorrect accounting of concurrent connections for a vhost but we'll see as it shouldn't take long to find out if this fixed the issue or not.
__________________
█ Michael Denney - MDDHosting, LLC - Professional Hosting Solutions
█ LiteSpeed Powered - Shared, Reseller, Semi-Dedicated, and VPS
█ http://www.mddhosting.com/
|

04-26-2011, 12:56 AM
|
|
Senior Member
|
|
Join Date: Jul 2009
Posts: 218
|
|
|
Enough time has passed since George has gotten us the new build which resolved the concurrent connection bug that resolved our spinlock that I feel this can be marked resolved.
__________________
█ Michael Denney - MDDHosting, LLC - Professional Hosting Solutions
█ LiteSpeed Powered - Shared, Reseller, Semi-Dedicated, and VPS
█ http://www.mddhosting.com/
|

04-28-2011, 12:43 PM
|
|
Senior Member
|
|
Join Date: Jul 2009
Posts: 218
|
|
This isn't resolved, it just happened on another server.
Strace:
Code:
epoll_wait(8, {{EPOLLIN, {u32=0, u64=1082331758592}}, {EPOLLIN, {u32=0, u64=1099511627776}}, {EPOLLIN, {u32=0, u64=601295421440}}, {EPOLLIN, {u32=0, u64=146028888064}}, {EPOLLIN, {u32=0, u64=515396075520}}, {EPOLLIN, {u32=0, u64=455266533376}}, {EPOLLIN, {u32=0, u64=635655159808}}, {EPOLLIN, {u32=0, u64=566935683072}}, {EPOLLIN, {u32=0, u64=721554505728}}, {EPOLLIN, {u32=0, u64=910533066752}}}, 10, 100) = 10
epoll_wait(8, {{EPOLLIN, {u32=0, u64=68719476736}}, {EPOLLIN, {u32=0, u64=60129542144}}, {EPOLLIN, {u32=0, u64=51539607552}}, {EPOLLIN, {u32=0, u64=687194767360}}, {EPOLLIN, {u32=0, u64=987842478080}}, {EPOLLIN, {u32=0, u64=996432412672}}, {EPOLLIN, {u32=0, u64=678604832768}}, {EPOLLIN, {u32=0, u64=1120986464256}}, {EPOLLIN, {u32=0, u64=919123001344}}, {EPOLLIN, {u32=0, u64=695784701952}}}, 10, 100) = 10
epoll_wait(8, {{EPOLLIN, {u32=0, u64=644245094400}}, {EPOLLIN, {u32=0, u64=618475290624}}, {EPOLLIN, {u32=0, u64=609885356032}}, {EPOLLIN, {u32=0, u64=798863917056}}, {EPOLLIN, {u32=0, u64=532575944704}}, {EPOLLIN, {u32=0, u64=970662608896}}, {EPOLLIN, {u32=0, u64=773094113280}}, {EPOLLIN, {u32=0, u64=893353197568}}, {EPOLLIN, {u32=0, u64=292057776128}}, {EPOLLIN, {u32=0, u64=979252543488}}}, 10, 100) = 10
Backtrace:
Code:
[New Thread 23136]
[New Thread 23135]
Core was generated by `litespeed'.
Program terminated with signal 11, Segmentation fault.
#0 0x00000038ede0b150 in pthread_cond_timedwait@@GLIBC_2.3.2 ()
from /lib64/libpthread.so.0
#0 0x00000038ede0b150 in pthread_cond_timedwait@@GLIBC_2.3.2 ()
from /lib64/libpthread.so.0
#1 0x00000000004f77f7 in PThreadCond::wait (this=0x571fcb8,
pMutex=0x571fc90, lMilliSec=1000)
at /home/gwang/release/httpd/httpd/thread/pthread/pthreadcond.cpp:33
#2 0x000000000041665b in PThreadWorkQueue<BlockOpReq>::get (this=0x571fc90,
pWork=0x42ec3db0, size=@0x42ec3dac, lMilliSec=1000)
at /home/gwang/release/httpd/httpd/thread/pthread/pthreadworkqueue.h:147
#3 0x0000000000416563 in TWorkQueue<BlockOpReq, PThreadWorkQueue<BlockOpReq> >::get (this=0x571fc90, pWork=0x42ec3db0, size=@0x42ec3dac, lMilliSec=1000)
at /home/gwang/release/httpd/httpd/thread/workqueue.h:37
#4 0x00000000004163da in WorkCrew<BlockOpReq, BlockOpDoWork, WorkQueue<BlockOpReq> >::TCrewMember<BlockOpReq, BlockOpDoWork, WorkQueue<BlockOpReq> >::run (
this=0x5709030) at /home/gwang/release/httpd/httpd/thread/workcrew.h:307
#5 0x000000000041631c in PThreadImpl<WorkCrew<BlockOpReq, BlockOpDoWork, WorkQueue<BlockOpReq> >::TCrewMember<BlockOpReq, BlockOpDoWork, WorkQueue<BlockOpReq> > >::threadRoutine (arg=0x5709030)
at /home/gwang/release/httpd/httpd/thread/pthread/pthreadimpl.h:98
#6 0x00000038ede0673d in start_thread () from /lib64/libpthread.so.0
#7 0x00000038ed6d44bd in clone () from /lib64/libc.so.6
__________________
█ Michael Denney - MDDHosting, LLC - Professional Hosting Solutions
█ LiteSpeed Powered - Shared, Reseller, Semi-Dedicated, and VPS
█ http://www.mddhosting.com/
|
| Thread Tools |
|
|
| Display Modes |
Hybrid Mode
|
Posting Rules
|
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts
HTML code is Off
|
|
|
All times are GMT -7. The time now is 11:43 AM.
|
|