22 июня 2011 г. 5:49 пользователь Maxim Dounin
- для глобального error_log и для нужного хоста включал error_log на уровне debug
Новые coredump'ы и большое количество жалоб на "open socket left in connection" появляются в большом количестве при рестарте.
Помимо этого за прошедшую ночь появилось с десяток новых coredump'ов, но уже по сообщению "worker process N exited on signal 6 (core dumped)" и без упоминания сокетов.
root@server:/var/log/nginx# bzgrep 23887 error.log.0.bz2 | less
2011/06/21 19:28:35 [alert] 23887#0: open socket #4 left in connection 3
2011/06/21 19:28:35 [alert] 23887#0: open socket #57 left in connection 4
2011/06/21 19:28:35 [alert] 23887#0: open socket #35 left in connection 5
2011/06/21 19:28:35 [alert] 23887#0: open socket #41 left in connection 6
2011/06/21 19:28:35 [alert] 23887#0: open socket #45 left in connection 7
2011/06/21 19:28:35 [alert] 23887#0: open socket #46 left in connection 8
2011/06/21 19:28:35 [alert] 23887#0: open socket #49 left in connection 9
2011/06/21 19:28:35 [alert] 23887#0: open socket #50 left in connection 10
2011/06/21 19:28:35 [alert] 23887#0: open socket #51 left in connection 11
2011/06/21 19:28:35 [alert] 23887#0: open socket #53 left in connection 12
2011/06/21 19:28:35 [alert] 23887#0: open socket #112 left in connection 14
2011/06/21 19:28:35 [alert] 23887#0: open socket #58 left in connection 15
2011/06/21 19:28:35 [alert] 23887#0: open socket #59 left in connection 16
2011/06/21 19:28:35 [alert] 23887#0: open socket #60 left in connection 17
2011/06/21 19:28:35 [alert] 23887#0: open socket #61 left in connection 18
2011/06/21 19:28:35 [alert] 23887#0: open socket #84 left in connection 19
2011/06/21 19:28:35 [alert] 23887#0: open socket #65 left in connection 20
2011/06/21 19:28:35 [alert] 23887#0: open socket #74 left in connection 21
2011/06/21 19:28:35 [alert] 23887#0: open socket #75 left in connection 22
2011/06/21 19:28:35 [alert] 23887#0: open socket #76 left in connection 23
2011/06/21 19:28:35 [alert] 23887#0: open socket #77 left in connection 24
2011/06/21 19:28:35 [alert] 23887#0: open socket #80 left in connection 25
2011/06/21 19:28:35 [alert] 23887#0: open socket #81 left in connection 26
2011/06/21 19:28:35 [alert] 23887#0: open socket #12 left in connection 27
2011/06/21 19:28:35 [alert] 23887#0: open socket #95 left in connection 28
2011/06/21 19:28:35 [alert] 23887#0: open socket #86 left in connection 29
2011/06/21 19:28:35 [alert] 23887#0: open socket #87 left in connection 30
2011/06/21 19:28:35 [alert] 23887#0: open socket #62 left in connection 31
2011/06/21 19:28:35 [alert] 23887#0: open socket #90 left in connection 32
2011/06/21 19:28:35 [alert] 23887#0: open socket #91 left in connection 33
2011/06/21 19:28:35 [alert] 23887#0: open socket #31 left in connection 34
2011/06/21 19:28:35 [alert] 23887#0: open socket #89 left in connection 35
2011/06/21 19:28:35 [alert] 23887#0: open socket #97 left in connection 36
2011/06/21 19:28:35 [alert] 23887#0: open socket #99 left in connection 37
2011/06/21 19:28:35 [alert] 23887#0: open socket #100 left in connection 38
2011/06/21 19:28:35 [alert] 23887#0: open socket #114 left in connection 40
2011/06/21 19:28:35 [alert] 23887#0: open socket #92 left in connection 41
2011/06/21 19:28:35 [alert] 23887#0: open socket #83 left in connection 42
2011/06/21 19:28:35 [alert] 23887#0: open socket #107 left in connection 45
2011/06/21 19:28:35 [alert] 23887#0: open socket #108 left in connection 46
2011/06/21 19:28:35 [alert] 23887#0: open socket #109 left in connection 47
2011/06/21 19:28:35 [alert] 23887#0: open socket #110 left in connection 48
2011/06/21 19:28:35 [alert] 23887#0: open socket #115 left in connection 49
2011/06/21 19:28:35 [alert] 23887#0: open socket #116 left in connection 50
2011/06/21 19:28:35 [alert] 23887#0: open socket #117 left in connection 51
2011/06/21 19:28:35 [alert] 23887#0: open socket #118 left in connection 52
2011/06/21 19:28:35 [alert] 23887#0: open socket #119 left in connection 53
2011/06/21 19:28:35 [alert] 23887#0: open socket #120 left in connection 54
2011/06/21 19:28:35 [alert] 23887#0: open socket #121 left in connection 55
2011/06/21 19:28:35 [alert] 23887#0: open socket #122 left in connection 56
2011/06/21 19:28:35 [alert] 23887#0: open socket #125 left in connection 58
2011/06/21 19:28:35 [alert] 23887#0: open socket #126 left in connection 59
2011/06/21 19:28:35 [alert] 23887#0: open socket #128 left in connection 60
2011/06/21 19:28:35 [alert] 23887#0: open socket #129 left in connection 61
2011/06/21 19:28:35 [alert] 23887#0: aborting
2011/06/21 19:30:14 [alert] 21146#0: worker process 23887 exited on signal 6 (core dumped)
root@server:~# gdb -c /var/tmp/65534.nginx.23887.core /usr/local/sbin/nginx
GNU gdb 6.1.1 [FreeBSD]
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB. Type "show warranty" for details.
This GDB was configured as "amd64-marcel-freebsd"...
Core was generated by `nginx'.
Program terminated with signal 6, Aborted.
Reading symbols from /lib/libcrypt.so.5...done.
Loaded symbols for /lib/libcrypt.so.5
Reading symbols from /usr/local/lib/libpcre.so.0...done.
Loaded symbols for /usr/local/lib/libpcre.so.0
Reading symbols from /usr/local/lib/libssl.so.7...done.
Loaded symbols for /usr/local/lib/libssl.so.7
Reading symbols from /usr/local/lib/libcrypto.so.7...done.
Loaded symbols for /usr/local/lib/libcrypto.so.7
Reading symbols from /lib/libz.so.5...done.
Loaded symbols for /lib/libz.so.5
Reading symbols from /usr/local/lib/libGeoIP.so.5...done.
Loaded symbols for /usr/local/lib/libGeoIP.so.5
Reading symbols from /usr/local/lib/perl5/5.10.1/mach/CORE/libperl.so...done.
Loaded symbols for /usr/local/lib/perl5/5.10.1/mach/CORE/libperl.so
Reading symbols from /lib/libm.so.5...done.
Loaded symbols for /lib/libm.so.5
Reading symbols from /lib/libutil.so.8...done.
Loaded symbols for /lib/libutil.so.8
Reading symbols from /lib/libc.so.7...done.
Loaded symbols for /lib/libc.so.7
Reading symbols from /usr/local/lib/perl5/site_perl/5.10.1/mach/auto/nginx/nginx.so...done.
Loaded symbols for /usr/local/lib/perl5/site_perl/5.10.1/mach/auto/nginx/nginx.so
Reading symbols from /libexec/ld-elf.so.1...done.
Loaded symbols for /libexec/ld-elf.so.1
#0 0x00000008014f1fcc in kill () from /lib/libc.so.7
(gdb) qq 34
$1 = {len = 43, data = "" "/arhiv/newnew/dwlg520_revB3_drivers_318.zip"}
$2 = {len = 46, data = "" "GET /a52babab9bb8355b156358f434627bb0 HTTP/1.1\r\nAccept"}
discard_body 0
lingering_close 0
lingering_time 0
keepalive 1
$3 = 0x46d7a7 <ngx_http_upstream_rd_check_broken_connection>
$4 = 0x470f3b <ngx_http_upstream_process_downstream>
$5 = 0x4b79fa "reading upstream"
status 200
count 1
blocked 0
sent 33580
length -1
Cannot access memory at address 0x0
Оригинальный вывод (без blabla) чуть выше.
(gdb) set $c = &ngx_cycle->connections[34]
(gdb) p $c->log->connection
$6 = 3682
(gdb) p *$c
$7 = {data = "" read = 0x809804ee0, write = 0x809a04ee0, fd = 31, recv = 0x43914c <ngx_unix_recv>, send = 0x439b00 <ngx_unix_send>, recv_chain = 0x439470 <ngx_readv_chain>, send_chain = 0x43f52c <ngx_freebsd_sendfile_chain>,
listening = 0x801844800, sent = 33580, log = 0x801de5360, pool = 0x801de5300, sockaddr = 0x801de5350, socklen = 16, addr_text = {len = 12, data = "" "41.202.20.82166"}, ssl = 0x0, local_sockaddr = 0x801b6dc60,
buffer = 0x801de5420, queue = {prev = 0x0, next = 0x0}, number = 3682, requests = 1, buffered = 16, log_error = 2, single_connection = 0, unexpected_eof = 0, timedout = 0, error = 1, destroyed = 0, idle = 0, reusable = 0, close = 0,
sendfile = 0, sndlowat = 0, tcp_nodelay = 0, tcp_nopush = 0, aio_sendfile = 0, busy_sendfile = 0x0}
(gdb) set $r = (ngx_http_request_t *) $c->data
(gdb) p *$r
$8 = {signature = 1347703880, connection = 0x801e05a90, ctx = 0x801d2e410, main_conf = 0x801844df8, srv_conf = 0x8019c3120, loc_conf = 0x8019a6670, read_event_handler = 0x46d7a7 <ngx_http_upstream_rd_check_broken_connection>,
write_event_handler = 0x470f3b <ngx_http_upstream_process_downstream>, cache = 0x801d31120, upstream = 0x801d2fb88, upstream_states = 0x801d2f518, pool = 0x801d2e000, header_in = 0x801de5420, headers_in = {headers = {
last = 0x801884670, part = {elts = 0x801d2e7f0, nelts = 7, next = 0x0}, size = 48, nalloc = 20, pool = 0x801d2e000}, host = 0x801d2e8e0, connection = 0x0, if_modified_since = 0x0, if_unmodified_since = 0x0,
user_agent = 0x801d2e880, referer = 0x801d2e820, content_length = 0x0, content_type = 0x0, range = 0x0, if_range = 0x0, transfer_encoding = 0x0, expect = 0x0, accept_encoding = 0x801d2e8b0, via = 0x0, authorization = 0x0,
keep_alive = 0x0, x_forwarded_for = 0x0, x_real_ip = 0x0, user = {len = 0, data = "" passwd = {len = 0, data = "" cookies = {elts = 0x801d2ebb0, nelts = 0, size = 8, nalloc = 2, pool = 0x801d2e000}, server = {len = 22,
data = "" "dn-free.domain.comconnection"}, content_length_n = -1, keep_alive_n = -1, connection_type = 2, msie = 1, msie4 = 0, msie6 = 0, opera = 0, gecko = 0, chrome = 0, safari = 0, konqueror = 0}, headers_out = {
headers = {last = 0x8018847a8, part = {elts = 0x801d2e050, nelts = 4, next = 0x0}, size = 48, nalloc = 20, pool = 0x801d2e000}, status = 200, status_line = {len = 6, data = "" "200 OK"}, server = 0x0, date = 0x0,
content_length = 0x801d2e080, content_encoding = 0x0, location = 0x0, refresh = 0x0, last_modified = 0x801d2e0b0, content_range = 0x0, accept_ranges = 0x801d2e0e0, www_authenticate = 0x0, expires = 0x0, etag = 0x0,
override_charset = 0x0, content_type_len = 15, content_type = {len = 15, data = "" "application/zip"}, charset = {len = 0, data = "" content_type_lowcase = 0x801d31bee "application/zip",
content_type_hash = 8984756889729896930, cache_control = {elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0}, content_length_n = 33618230, date_time = 0, last_modified_time = 1299776106}, request_body = 0x801d2ef68,
lingering_time = 0, start_sec = 1308684468, start_msec = 765, method = 2, http_version = 1001, request_line = {len = 46, data = "" "GET /a52babab9bb8355b156358f434627bb0 HTTP/1.1\r\nAccept"}, uri = {len = 43,
data = "" "/arhiv/newnew/dwlg520_revB3_drivers_318.zip"}, args = {len = 0, data = "" exten = {len = 3, data = "" "zip"}, unparsed_uri = {len = 33,
data = "" "/a52babab9bb8355b156358f434627bb0 HTTP/1.1\r\nAccept"}, method_name = {len = 3, data = "" "GET /a52babab9bb8355b156358f434627bb0 HTTP/1.1\r\nAccept"}, http_protocol = {len = 8,
data = "" "HTTP/1.1\r\nAccept"}, out = 0x801ffc2d0, main = 0x801884600, parent = 0x0, postponed = 0x0, post_subrequest = 0x0, posted_requests = 0x0, virtual_names = 0x801b6dcb0, phase_handler = 12,
content_handler = 0x49db38 <ngx_http_proxy_handler>, access_code = 0, variables = 0x801d2e560, ncaptures = 4, captures = 0x801d2fb20, captures_data = 0x801d2f7fe "/get/arhiv/newnew/dwlg520_revB3_drivers_318.zip", limit_rate = 0,
header_size = 295, request_length = 347, err_status = 0, http_connection = 0x801de53b8, log_handler = 0x45b562 <ngx_http_log_error_handler>, cleanup = 0x801d31838, subrequests = 51, count = 1, blocked = 0, aio = 0, http_state = 2,
complex_uri = 0, quoted_uri = 0, plus_in_uri = 0, space_in_uri = 0, invalid_header = 0, add_uri_to_alias = 0, valid_location = 0, valid_unparsed_uri = 0, uri_changed = 0, uri_changes = 10, request_body_in_single_buf = 0,
request_body_in_file_only = 0, request_body_in_persistent_file = 0, request_body_in_clean_file = 0, request_body_file_group_access = 0, request_body_file_log_level = 5, subrequest_in_memory = 0, waited = 0, cached = 0,
gzip_tested = 0, gzip_ok = 0, gzip_vary = 0, proxy = 0, bypass_cache = 0, no_cache = 0, limit_zone_set = 1, limit_req_set = 1, pipeline = 0, plain_http = 0, chunked = 0, header_only = 0, keepalive = 1, lingering_close = 0,
discard_body = 0, internal = 1, error_page = 0, ignore_content_encoding = 0, filter_finalize = 0, post_action = 0, request_complete = 0, request_output = 1, header_sent = 1, expect_tested = 0, root_tested = 0, done = 0, logged = 0,
buffered = 4, main_filter_need_in_memory = 0, filter_need_in_memory = 0, filter_need_temporary = 0, allow_ranges = 0, stat_reading = 0, stat_writing = 1, state = 0, header_hash = 5215665306198305595, lowcase_index = 13,
lowcase_header = "accept-rangeshction", '\0' <repeats 12 times>,
header_name_start = 0x80da0412f "?8\200\222?\212\027|?eg?mw?$\217?\005\201F??)?j8dNmc\213\005\024?\222\235\b\235\223???S\a??bw\n7{?SG?P\212M??H?>5?\217?U\"?l????6\211?\177\020?\200\200\200?\016h?Ty??C*B$\234z\234<\216\f \237\031\225e??\020\025",
header_name_end = 0x80da04126 "\231\221\235\nB?L\020o?8\200\222?\212\027|?eg?mw?$\217?\005\201F??)?j8dNmc\213\005\024?\222\235\b\235\223???S\a??bw\n7{?SG?P\212M??H?>5?\217?U\"?l????6\211?\177\020?\200\200\200?\016h?Ty??C*B$\234z\234<\216\f \237\031\225e??\020\025",
header_start = 0x80da04128 "\235\nB?L\020o?8\200\222?\212\027|?eg?mw?$\217?\005\201F??)?j8dNmc\213\005\024?\222\235\b\235\223???S\a??bw\n7{?SG?P\212M??H?>5?\217?U\"?l????6\211?\177\020?\200\200\200?\016h?Ty??C*B$\234z\234<\216\f \237\031\225e??\020\025",
header_end = 0x80da0412f "?8\200\222?\212\027|?eg?mw?$\217?\005\201F??)?j8dNmc\213\005\024?\222\235\b\235\223???S\a??bw\n7{?SG?P\212M??H?>5?\217?U\"?l????6\211?\177\020?\200\200\200?\016h?Ty??C*B$\234z\234<\216\f \237\031\225e??\020\025", uri_start = 0x8099ac404 "/a52babab9bb8355b156358f434627bb0 HTTP/1.1\r\nAccept", uri_end = 0x8099ac425 " HTTP/1.1\r\nAccept", uri_ext = 0x0, args_start = 0x0,
request_start = 0x8099ac400 "GET /a52babab9bb8355b156358f434627bb0 HTTP/1.1\r\nAccept", request_end = 0x8099ac42e "\r\nAccept", method_end = 0x8099ac402 "T /a52babab9bb8355b156358f434627bb0 HTTP/1.1\r\nAccept", schema_start = 0x0,
schema_end = 0x0, host_start = 0x0, host_end = 0x0, port_start = 0x0, port_end = 0x0, http_minor = 1, http_major = 1}
Количество сокетов в статусе CLOSED не имеет значения? Или необходимо дождаться существенно больших значений?
Еще один момент - при reload'е не происходит падения, либо появления новых coredump'ов.
По крайней мере мои вчерашние и сегодняшние попытки спровоцировать падение через reload успехом не увенчались.