Kubernetes 1.18.x版本部署metrics-server组件时,采集不到数据

表现为:

  • kubectl top nodeskubectl top nodes报错信息如下

    Error from server (ServiceUnavailable): the server is currently unable to handle the request (get nodes.metrics.k8s.io)
    Error from server (ServiceUnavailable): the server is currently unable to handle the request (get pods.metrics.k8s.io)

查看kube-apiserver的日志信息
kubectl -n kube-system logs -f kube-apiserver-master-1 --tail 10

E0826 04:25:10.111976       1 available_controller.go:420] v1beta1.metrics.k8s.io failed with: failing or missing response from https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: Get https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)
E0826 04:25:15.112635       1 available_controller.go:420] v1beta1.metrics.k8s.io failed with: failing or missing response from https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: Get https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)
W0826 04:25:37.762783       1 handler_proxy.go:102] no RequestInfo found in the context
E0826 04:25:37.762890       1 controller.go:114] loading OpenAPI spec for "v1beta1.metrics.k8s.io" failed with: failed to retrieve openAPI spec, http error: ResponseCode: 503, Body: service unavailable
, Header: map[Content-Type:[text/plain; charset=utf-8] X-Content-Type-Options:[nosniff]]
I0826 04:25:37.762915       1 controller.go:127] OpenAPI AggregationController: action for item v1beta1.metrics.k8s.io: Rate Limited Requeue.
E0826 04:25:41.763211       1 available_controller.go:420] v1beta1.metrics.k8s.io failed with: failing or missing response from https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: Get https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)
E0826 04:25:46.764318       1 available_controller.go:420] v1beta1.metrics.k8s.io failed with: failing or missing response from https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: Get https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)
E0826 04:26:11.763745       1 available_controller.go:420] v1beta1.metrics.k8s.io failed with: failing or missing response from https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: Get https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)
E0826 04:26:16.764339       1 available_controller.go:420] v1beta1.metrics.k8s.io failed with: failing or missing response from https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: Get https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)
E0826 04:26:41.763920       1 available_controller.go:420] v1beta1.metrics.k8s.io failed with: failing or missing response from https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: Get https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)
E0826 04:26:46.764574       1 available_controller.go:420] v1beta1.metrics.k8s.io failed with: failing or missing response from https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: Get https://10.108.243.54:443/apis/metrics.k8s.io/v1beta1: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)

这段日志看不出什么问题。

最终解决:kube-apiserver添加--enable-aggregator-routing=true启动参数,原因不明,因为在我另一个k8s 1.16.x集群中,并没有添加这项参数,工作正常。有知道原因的伙伴请不吝告知,在此谢过。

PS:网上有文章说如果master节点没有运行kube-proxy进程才需要加上这个启动参数,而我的集群中master节点是有运行kube-proxy的。

参考链接1:https://github.com/kubernetes-sigs/metrics-server/issues/448
参考链接2:https://blog.z0ukun.com/?p=1462

邮件内容支持附件、图片、文字

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time     : 2020/5/13 6:05 下午
# @Author   : 陈日志
# @Email    : [email protected]
# @File     : Mail.py
import os
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.image import MIMEImage
from email.header import Header


class Mail(object):
    def __init__(self, smtp_server, smtp_port=25, ssl=False):
        self._message = MIMEMultipart()
        if ssl:
            self.smtp = smtplib.SMTP_SSL(smtp_server, 465 if smtp_port == 25 else smtp_port)
        else:
            self.smtp = smtplib.SMTP(smtp_server, smtp_port)
        self._receivers = None
        self._sender = None
        self._content = ''
        self._imageid = 0

    def login(self, user, password):
        try:
            self.smtp.login(user, password)
        except Exception as e:
            print(e)
            raise smtplib.SMTPAuthenticationError(1, "login failed")
        self._message["From"] = user
        self._sender = user

    @property
    def receives(self):
        return self._receivers

    @property
    def receives(self):
        return self._receivers

    @receives.setter
    def receives(self, receivers):
        type_ = type(receivers)
        if type_ is str:
            receivers = receivers.split(',')
            self._message["To"] = receivers
            self._receivers = receivers
        elif type_ is list or type_ is tuple:
            self._message["To"] = ','.join(receivers)
            self._receivers = receivers

    @property
    def title(self):
        return self._message.get("Subject")

    @title.setter
    def title(self, title):
        self._message["Subject"] = Header(title, 'utf-8')

    @property
    def content(self):
        return self._message

    @content.setter
    def content(self, content):
        self._content = content

    @property
    def imageid(self):
        self._imageid += 1
        return self._imageid

    def append_content(self, content):
        self._content += content

    def add_attachment(self, file):
        att = MIMEText(open(file, 'rb').read(), 'base64', 'utf-8')
        att["Content-Type"] = 'application/octet-stream'
        # att["Content-Disposition"] = 'attachment; filename=%s' % os.path.basename(file).encode('utf-8')
        att.add_header('Content-Disposition', 'attachment', filename=('utf-8', '', os.path.basename(file)))
        self._message.attach(att)

    def add_image(self, img):
        imageid = 'image%s' % self.imageid
        msg = MIMEImage(open(img, 'rb').read())
        msg.add_header('Content-ID', imageid)
        self._content += '<br/><img src="cid:%s">' % imageid
        self._message.attach(msg)

    def send(self):
        self._message.attach(MIMEText(self._content, 'html', 'utf-8'))
        try:
            self.smtp.sendmail(self._sender, self._receivers, self._message.as_string())
        except Exception as e:
            print(e)
            raise smtplib.SMTPConnectError(1, "Send mail error")

    def logout(self):
        self.smtp.quit()

- 阅读剩余部分 -

使用 Haproxy、Nginx 等软件反向代理 HTTP 协议时,往往会设置 X-Forwarded-For 等头部让后端 Web server 能正确取到客户端真实 IP。如果做 4 层代理,那么就得借助 Linux 的特性 TProxy 来实现,本文讲解如何配置 Haproxy、Nginx 来实现透明代理。

Haproxy

网上很多文章都说到 Haproxy 要重新编译,编译参数加上USE_LINUX_TPROXY=1。其实这种说法已经过时了,以 RHEL 系 Linux 发行版为例,RHEL6、RHEL7、RHEL8 官方提供的 rpm 包默认就加上了这个编译参数,因此如对版本无特殊要求,使用官方提供的版本就可以了,使用 yum install haproxy 安装即可。

使用限制

  • 代理服务器与后端服务器IP地址必须在同一个网段
  • 后端服务器的默认网关要指向代理服务器的地址*

代理服务器配置

haproxy

listen 80
    bind :80
    mode tcp
    source 0.0.0.0 usesrc clientip
    server server1 server1:8080 weight 1

关键配置 source 0.0.0.0 usesrc clientip

- 阅读剩余部分 -

升级限制

  • SELinux 必须是关闭状态
  • 必须是 CentOS 6 x86_64
  • 根分区剩余空间不小于5GB,/boot分区剩余空间不小于120MB
  • 必须能连外网

升级风险

  • 升级需要1-3小时不等,甚至更长时间,视硬盘的速度和系统已安装的软件包数量
  • 升级可能会导致 sshd 起不来,因此远程升级有较大的风险
  • 升级可能会导致网卡配不上IP,因此远程升级有较大的风险
  • 升级后有部分软件不能使用(如 grep、yum等),需要手动处理
  • 升级后需要手动解决依赖关系,此步骤通常需要1-2小时甚至更久,视系统安装的软件包数量
  • 此升级不可逆,如升级失败系统将无法正常启动。物理机升级有较大风险,升级前务必做好数据备份。虚拟机升级前务必做好快照

升级路径

CentOS 6.x --> CentOS 7.2 --> CentOS 7 最新版

升级步骤

- 阅读剩余部分 -

接上一篇用Python写一个UDP端口测试工具(一)

需求

最近有个运维需求,需要测试客户端的UDP端口与服务器的连通性。

需求也很简单:客户端测往服务端发UDP包,服务端收到包后响应客户端,当客户端能收到服务端的响应则可断定端口是可达的。但是服务端需要测试的端口有很多,需要一款工具来实现。

思路

虽然nc等工具可以测试端口,但是面对多端口测试场景,就显得捉襟见肘了,因此就想到使用Python的socket编程来自己写一个工具来实现这个功能。

具体的思路如下:

  1. 起一个TCP线程,用于客户端、服务端之间协商需要测试的端口
  2. 起一个UDP线程,用于测试端口是否可达
  3. 由客户端指定需要测试哪些端口,用逗号分开端口号,端口范围使用“-”或“:”连接符指定

show you the code

服务端

#!/usr/bin/env python                                                                                                                                                                                                                                                         
#!/usr/libexec/platform-python
# encoding: utf-8
import os
import signal
import socket
import time
from threading import Thread, Event
import json
try:
    import queue
except ImportError:
    import Queue as queue


class TCPThread(Thread):
    def __init__(self, queue, event):
        Thread.__init__(self)
        self.queue = queue
        self.event = event

    def run(self):
        sock = socket.socket(family=socket.AF_INET, type=socket.SOCK_STREAM)
        sock.bind(('0.0.0.0', 4000))
        sock.listen(1)
        while True:
            conn, addr = sock.accept()
            while True:
                data_recv = conn.recv(65535)
                if data_recv == b'':
                    conn.close()
                    break
                try:
                    data_json = json.loads(data_recv.decode())
                except json.decoder.JSONDecodeError:
                    print("json encode error")
                    continue
                """ 合法数据
                {
                    "proto": "udpping",
                    "port": number,
                    "available": False,
                    "ready": False
                }
                """
                if "proto" in data_json and "port" in data_json:
                    # 判断UDP本地端口是否可用
                    try:
                        us = socket.socket(family=socket.AF_INET, type=socket.SOCK_DGRAM)
                        us.bind(('0.0.0.0', data_json['port']))
                        us.close()
                        data_json["available"] = True
                        conn.sendall(json.dumps(data_json).encode())
                        self.queue.put(data_json)
                        self.event.wait()  # --> 等待事件
                        self.event.clear()  # -->清除事件,以方便下次读取
                    except socket.error:
                        print("port %s unavailable" % data_json['port'])
                        conn.sendall(json.dumps(data_json).encode())
                        pass
                else:
                    print("proto error")
                    continue


class UDPThread(Thread):
    def __init__(self, queue, event):
        Thread.__init__(self)
        self.queue = queue
        self.event = event

    def run(self):

        while True:
            try:
                data = self.queue.get(timeout=3)
            except queue.Empty:
                continue
            sock = socket.socket(family=socket.AF_INET, type=socket.SOCK_DGRAM)
            sock.bind(('0.0.0.0', data['port']))
            sock.setblocking(False)
            timewait = 0
            while True:
                if timewait > 3:
                    break
                try:
                    data_recv, addr = sock.recvfrom(65535)
                    sock.sendto(data_recv, addr)
                    break
                except Exception:
                    time.sleep(0.01)
                    timewait += 0.01
            self.event.set()  # --> 发送通知事件,通知TCP线程开始干活


class Utils(object):
    @staticmethod
    def signal_handler(signal, frame):
        os._exit(0)


if __name__ == '__main__':
    signal.signal(signal.SIGINT, Utils.signal_handler)
    q = queue.Queue()
    event = Event()
    tcpthread = TCPThread(q, event)
    udpthread = UDPThread(q, event)

    tcpthread.start()
    udpthread.start()

客户端

#!/usr/libexec/platform-python
# encoding: utf-8
import re
import signal
import socket
from threading import Thread, Event
import json
import string
import random
import os
import sys
try:
    import queue
except ImportError:
    import Queue as queue


class TCPThread(Thread):
    def __init__(self, queue, tevent):
        Thread.__init__(self)
        self.queue = queue
        self.event = tevent
        self.server = '127.0.0.1'
        self.port = 4000
        self.proto_json = {
                "proto": "udpping",
                "server": "127.0.0.1",
                "port": 4000,
                "available": False,
            }
        self.testportlist = []

    def setserver(self, server, port):
        self.server = server
        self.port = port
        self.proto_json['server'] = server

    def _setport(self, port):
        try:
            port = int(port)
            if 0 < port < 65535:
                self.testportlist.append(int(port))
        except ValueError:
            print("%s not a number, ignore" % port)

    def _setportrange(self, portrange):
        _start, _end = re.split(':|-', portrange)
        try:
            _start = int(_start)
            _end = int(_end)
            if _start < _end and 0 < _start < 65535 and 0 < _end < 65535:
                self.testportlist.extend(list(range(int(_start), int(_end) + 1)))
            else:
                print("%s is a illegal port range, ignore" % portrange)
        except ValueError:
            print("%s or %s not a number, ignore %s" % (_start, _end, portrange))

    def settestports(self, port):
        if len(port.split(',')) > 1:
            for _slice in port.split(','):
                if len(re.split('-|:', _slice)) == 2:
                    self._setportrange(_slice)
                else:
                    self._setport(_slice)
        elif len(re.split('-|:', port)) == 2:
            self._setportrange(port)
        else:
            self._setport(port)

    def run(self):
        sock = socket.socket(family=socket.AF_INET, type=socket.SOCK_STREAM)
        sock.connect((self.server, self.port))

        while True:
            try:
                testport = self.testportlist.pop(0)
            except IndexError:
                break
            self.proto_json["port"] = testport
            data_send = json.dumps(self.proto_json).encode()
            sock.send(data_send)
            data_recv = sock.recv(65535)
            data_json = json.loads(data_recv.decode())
            if data_json["available"]:
                self.queue.put(data_json)
                self.event.wait()  # --> 等待UDP线程给事件通知
                self.proto_json["port"] += 1
                self.event.clear()  # -->清除事件,以方便下次读取
            else:
                self.proto_json["port"] += 1


class UDPThread(Thread):
    def __init__(self, queue, tevent):
        Thread.__init__(self)
        self.queue = queue
        self.event = tevent

    def run(self):
        while True:
            try:
                data = self.queue.get(timeout=3)
            except queue.Empty:
                # 队列为空,说明TCP线程已经执行完毕
                break
            sock = socket.socket(family=socket.AF_INET, type=socket.SOCK_DGRAM)
            payload = Utils.random_string(64)
            sock.sendto(payload.encode(), (data["server"], data["port"]))
            retrans = 0
            timeout = 0.5
            while True:
                if retrans >= 3:
                    print("port %s unreachable" % data["port"])
                    break
                sock.settimeout(timeout)
                try:
                    data_recv, addr = sock.recvfrom(65535)
                    if data_recv == payload.encode() and addr[1] == data["port"]:
                        break
                except socket.timeout:
                    sock.sendto(payload.encode(), (data["server"], data["port"]))
                    retrans += 1
                except Exception as e:
                    continue
            self.event.set()  # --> 发送通知事件,通知TCPThread干活


class Utils(object):
    @staticmethod
    def random_string(length):
        return ''.join(random.choice(string.ascii_letters + string.digits) for m in range(length))

    @staticmethod
    def signal_handler(signal, frame):
        os._exit(0)


def h():
    print(""" usage:""")
    print("""   this_program <dest_ip> <dest_port> <test_ports>""")

    print('')
    print(" examples:")
    print("   ./udpclient.py 192.168.1.1 4000 '5000,6000,7000-8000'")
    print('')


if __name__ == '__main__':
    if len(sys.argv) != 3 and len(sys.argv) != 4:
        h()
        exit()
    signal.signal(signal.SIGINT, Utils.signal_handler)

    server = socket.gethostbyname(sys.argv[1])
    port = int(sys.argv[2])
    testports = sys.argv[3]

    q = queue.Queue()
    event = Event()
    tcpthread = TCPThread(q, event)
    tcpthread.setserver(server, port)
    tcpthread.settestports(testports)
    udpthread = UDPThread(q, event)

    tcpthread.start()
    udpthread.start()
    tcpthread.join()
    udpthread.join()

    print('')

已知问题

  1. 程序一旦执行,不能使用Ctrl+C来停止,因为使用了多线程,不接收Ctrl+C信号,有解决方法。又不是不能用,就这样吧
  2. 服务端缺少重发机制,当服务端发给客户端的回应包丢包,则显示这个端口不可达(客户端有重发机制,当客户端到服务端的包丢包,会重发3次)