我们正在使用 Cassandra 2.2.7,在修复/引导/停用某些节点时,我们遇到了奇怪的流式传输问题。例如,我试图停用一个新引导的节点,然后立即在节点上收到以下错误:
ERROR [STREAM-OUT-/54.XX.XXX.XXX] 2016-08-04 17:42:44,051 StreamSession.java:532 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Streaming error occurred
java.io.IOException: Broken pipe
at sun.nio.ch.FileDispatcherImpl.write0(Native Method) ~[na:1.8.0_101]
at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47) ~[na:1.8.0_101]
at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93) ~[na:1.8.0_101]
at sun.nio.ch.IOUtil.write(IOUtil.java:51) ~[na:1.8.0_101]
at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:471) ~[na:1.8.0_101]
at org.apache.cassandra.io.util.BufferedDataOutputStreamPlus.doFlush(BufferedDataOutputStreamPlus.java:266) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.io.util.BufferedDataOutputStreamPlus.flush(BufferedDataOutputStreamPlus.java:274) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.ConnectionHandler$OutgoingMessageHandler.sendMessage(ConnectionHandler.java:390) [apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.ConnectionHandler$OutgoingMessageHandler.run(ConnectionHandler.java:361) [apache-cassandra-2.2.7.jar:2.2.7]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_101]
INFO [STREAM-OUT-/54.XX.XXX.XXX] 2016-08-04 17:42:44,102 StreamResultFuture.java:183 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Session with /54.XX.XXX.XXX is complete
WARN [STREAM-OUT-/54.XX.XXX.XXX] 2016-08-04 17:42:44,102 StreamResultFuture.java:210 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Stream failed
ERROR [STREAM-OUT-/54.XX.XXX.XXX] 2016-08-04 17:42:44,103 StreamSession.java:532 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Streaming error occurred
java.io.IOException: Broken pipe
at sun.nio.ch.FileDispatcherImpl.write0(Native Method) ~[na:1.8.0_101]
at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47) ~[na:1.8.0_101]
at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93) ~[na:1.8.0_101]
at sun.nio.ch.IOUtil.write(IOUtil.java:51) ~[na:1.8.0_101]
at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:471) ~[na:1.8.0_101]
at org.apache.cassandra.io.util.BufferedDataOutputStreamPlus.doFlush(BufferedDataOutputStreamPlus.java:266) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.io.util.BufferedDataOutputStreamPlus.flush(BufferedDataOutputStreamPlus.java:274) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.ConnectionHandler$OutgoingMessageHandler.sendMessage(ConnectionHandler.java:390) [apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.ConnectionHandler$OutgoingMessageHandler.run(ConnectionHandler.java:369) [apache-cassandra-2.2.7.jar:2.2.7]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_101]
ERROR [STREAM-IN-/54.XX.XXX.XXX] 2016-08-04 17:42:44,312 StreamSession.java:532 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Streaming error occurred
java.lang.RuntimeException: Outgoing stream handler has been closed
at org.apache.cassandra.streaming.ConnectionHandler.sendMessage(ConnectionHandler.java:143) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.StreamSession.receive(StreamSession.java:596) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.StreamSession.messageReceived(StreamSession.java:475) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.ConnectionHandler$IncomingMessageHandler.run(ConnectionHandler.java:293) ~[apache-cassandra-2.2.7.jar:2.2.7]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_101]
在其他节点上,我收到以下错误:
WARN [STREAM-IN-/163.XXX.XXX.XX] 2016-08-04 17:42:43,234 CompressedStreamReader.java:121 - [Stream d89c1640-5a6a-11e6-9152-91527bf14a77] Error while reading partition DecoratedKey(7971573768827100217, 39653161643838316130303031333963393436366330303863633031636637346565343435366233373663383633343039633963323039313161336333333034) from stream on ks='KEYSPACE_XXXXXXX' and table='TABLE_XXXXXXXX'.
ERROR [STREAM-IN-/163.XXX.XXX.XX] 2016-08-04 17:42:43,315 StreamSession.java:524 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Streaming error occurred
java.io.IOException: net.jpountz.lz4.LZ4Exception: Error decoding offset 33063 of input buffer
at org.apache.cassandra.io.compress.LZ4Compressor.uncompress(LZ4Compressor.java:92) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.compress.CompressedInputStream.decompress(CompressedInputStream.java:116) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.compress.CompressedInputStream.read(CompressedInputStream.java:94) ~[apache-cassandra-2.2.5.jar:2.2.5]
at java.io.DataInputStream.readUnsignedShort(DataInputStream.java:337) ~[na:1.8.0_77]
at org.apache.cassandra.utils.BytesReadTracker.readUnsignedShort(BytesReadTracker.java:140) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.utils.ByteBufferUtil.readShortLength(ByteBufferUtil.java:328) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.utils.ByteBufferUtil.readWithShortLength(ByteBufferUtil.java:338) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.db.composites.AbstractCType$Serializer.deserialize(AbstractCType.java:382) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.db.composites.AbstractCType$Serializer.deserialize(AbstractCType.java:366) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.db.OnDiskAtom$Serializer.deserializeFromSSTable(OnDiskAtom.java:75) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.db.AbstractCell$1.computeNext(AbstractCell.java:52) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.db.AbstractCell$1.computeNext(AbstractCell.java:46) ~[apache-cassandra-2.2.5.jar:2.2.5]
at com.google.common.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:143) ~[guava-16.0.jar:na]
at com.google.common.collect.AbstractIterator.hasNext(AbstractIterator.java:138) ~[guava-16.0.jar:na]
at org.apache.cassandra.io.sstable.format.big.BigTableWriter.appendFromStream(BigTableWriter.java:243) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.StreamReader.writeRow(StreamReader.java:187) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.compress.CompressedStreamReader.read(CompressedStreamReader.java:108) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.messages.IncomingFileMessage$1.deserialize(IncomingFileMessage.java:49) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.messages.IncomingFileMessage$1.deserialize(IncomingFileMessage.java:38) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.messages.StreamMessage.deserialize(StreamMessage.java:58) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.streaming.ConnectionHandler$IncomingMessageHandler.run(ConnectionHandler.java:261) ~[apache-cassandra-2.2.5.jar:2.2.5]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_77]
Caused by: net.jpountz.lz4.LZ4Exception: Error decoding offset 33063 of input buffer
at net.jpountz.lz4.LZ4JNIFastDecompressor.decompress(LZ4JNIFastDecompressor.java:39) ~[lz4-1.3.0.jar:na]
at org.apache.cassandra.io.compress.LZ4Compressor.uncompress(LZ4Compressor.java:87) ~[apache-cassandra-2.2.5.jar:2.2.5]
... 21 common frames omitted
INFO [STREAM-IN-/163.XXX.XXX.XX] 2016-08-04 17:42:43,317 StreamResultFuture.java:182 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Session with /163.XXX.XXX.XX is complete
WARN [STREAM-IN-/163.XXX.XXX.XX] 2016-08-04 17:42:43,317 StreamResultFuture.java:209 - [Stream #d89c1640-5a6a-11e6-9152-91527bf14a77] Stream failed
WARN [Thread-1988] 2016-08-04 17:42:43,319 CompressedInputStream.java:179 - Error while reading compressed input stream.
java.nio.channels.AsynchronousCloseException: null
at java.nio.channels.spi.AbstractInterruptibleChannel.end(AbstractInterruptibleChannel.java:205) ~[na:1.8.0_77]
at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:407) ~[na:1.8.0_77]
at sun.nio.ch.ChannelInputStream.read(ChannelInputStream.java:59) ~[na:1.8.0_77]
at sun.nio.ch.ChannelInputStream.read(ChannelInputStream.java:109) ~[na:1.8.0_77]
at sun.nio.ch.ChannelInputStream.read(ChannelInputStream.java:103) ~[na:1.8.0_77]
at org.apache.cassandra.streaming.compress.CompressedInputStream$Reader.runMayThrow(CompressedInputStream.java:169) ~[apache-cassandra-2.2.5.jar:2.2.5]
at org.apache.cassandra.utils.WrappedRunnable.run(WrappedRunnable.java:28) [apache-cassandra-2.2.5.jar:2.2.5]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_77]
有时候我更喜欢这个:
WARN [STREAM-IN-/163.XXX.XXX.XX] 2016-08-04 09:00:16,649 CompressedStreamReader.java:121 - [Stream b93dbb30-5a21-11e6-af4d-cf5ff5ded900] Error while reading partition DecoratedKey(-4093657774327370252, 37663734383563316637313338363739613462306435393036663431666566613963336237623534343663366131626238396562393764656262373662373835) from stream on ks='KEYSPACE_XXXXXX' and table='TABLE_XXXXXX'.
WARN [STREAM-IN-/163.XXX.XXX.XX] 2016-08-04 09:00:17,720 StreamSession.java:655 - [Stream #b93dbb30-5a21-11e6-af4d-cf5ff5ded900] Retrying for following error
java.lang.IllegalArgumentException: Not enough bytes. Offset: 39. Length: 13113. Buffer size: 54
at org.apache.cassandra.db.composites.AbstractCType.checkRemaining(AbstractCType.java:362) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.db.composites.AbstractCompoundCellNameType.fromByteBuffer(AbstractCompoundCellNameType.java:98) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.db.composites.AbstractCType$Serializer.deserialize(AbstractCType.java:382) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.db.composites.AbstractCType$Serializer.deserialize(AbstractCType.java:366) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.db.OnDiskAtom$Serializer.deserializeFromSSTable(OnDiskAtom.java:75) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.db.AbstractCell$1.computeNext(AbstractCell.java:52) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.db.AbstractCell$1.computeNext(AbstractCell.java:46) ~[apache-cassandra-2.2.7.jar:2.2.7]
at com.google.common.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:143) ~[guava-16.0.jar:na]
at com.google.common.collect.AbstractIterator.hasNext(AbstractIterator.java:138) ~[guava-16.0.jar:na]
at org.apache.cassandra.io.sstable.format.big.BigTableWriter.appendFromStream(BigTableWriter.java:243) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.StreamReader.writeRow(StreamReader.java:187) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.compress.CompressedStreamReader.read(CompressedStreamReader.java:108) ~[apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.messages.IncomingFileMessage$1.deserialize(IncomingFileMessage.java:49) [apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.messages.IncomingFileMessage$1.deserialize(IncomingFileMessage.java:38) [apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.messages.StreamMessage.deserialize(StreamMessage.java:58) [apache-cassandra-2.2.7.jar:2.2.7]
at org.apache.cassandra.streaming.ConnectionHandler$IncomingMessageHandler.run(ConnectionHandler.java:287) [apache-cassandra-2.2.7.jar:2.2.7]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_101]
可能是什么原因?我不认为这是网络问题,因为启动退役命令时,所有节点上都会立即出现此“字节数不足”错误,而我可以在节点之间“scp”数小时而没有任何问题。
即使修复也会因相同的错误而崩溃,所以我们陷入了“不稳定”的集群。
答案1
你真的应该看看 Cassandra 的 JIRA,它是奇怪的消息或行为的良好信息来源。最后一个 strace 似乎与这个追踪器例如