Django for Cluster Info
FROM python:3.9-slim
Install required packages
RUN apt-get update && \ apt-get install -y cron vim && \ rm -rf /var/lib/apt/lists/*
ENV PYTHONUNBUFFERED 1 WORKDIR /app RUN pip install django COPY . /app/ EXPOSE 8000
Add the crontab job
RUN echo "0 0 * * MON root python /app/data_script.py >> /var/log/data_script.log 2>&1" >> /etc/crontab
Start cron service
CMD ["sh", "-c", "service cron start && python manage.py makemigrations && python manage.py migrate && python manage.py runserver 0.0.0.0:8000"]
add ‘cluster_info’ to installed apps in settings.py → created models -> migrations -> created views -> created templates folder inside app ‘cluster_info’ → added .html file to it → created urls.py and edited it → went to base urls.py and edited it → data insertion script inside ‘cluster_info app’
pip install django
django-admin startproject ClusterInfoApp
cd ClusterInfoApp
python manage.py startapp cluster_info
The script to get the data in the required format
#!/bin/bash
output_file="server_data3.txt"
input_file="nodes.txt"
kubectl get nodes | grep -v ^NAME | awk '{print $1}' > $input_file
server_vms=($(cat "$input_file"))
for hostname in "${server_vms[@]}"; do
data=$(ssh "$hostname" '
hostname
hostname -I | awk "{print \$1}"
nproc
free -h | awk "/Mem:/ {print \$2}"
df -h / | awk "NR==2{print \$2}"
df -h /run | awk "NR==2{print \$2}"
df -h /boot | awk "NR==2{print \$2}"
')
data_cleaned=$(echo "$data" | tr '\n' ',')
data_cleaned=${data_cleaned%,}
echo "$data_cleaned" >> "$output_file"
done
rm "$input_file"
## The output data should look like this
master,192.168.219.246,4,7.6Gi,56G,777M,2.0G
worker,192.168.219.247,4,7.6Gi,56G,777M,2.0G
The current path looks like
.
├── ClusterInfoApp
│ ├── cluster_info
│ │ ├── admin.py
│ │ ├── apps.py
│ │ ├── data_script.py
│ │ ├── __init__.py
│ │ ├── migrations
│ │ │ ├── 0001_initial.py
│ │ │ ├── __init__.py
│ │ │ └── __pycache__
│ │ │ ├── 0001_initial.cpython-310.pyc
│ │ │ └── __init__.cpython-310.pyc
│ │ ├── models.py
│ │ ├── __pycache__
│ │ │ ├── admin.cpython-310.pyc
│ │ │ ├── apps.cpython-310.pyc
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── models.cpython-310.pyc
│ │ │ ├── urls.cpython-310.pyc
│ │ │ └── views.cpython-310.pyc
│ │ ├── templates
│ │ │ └── cluster_info.html
│ │ ├── tests.py
│ │ ├── urls.py
│ │ └── views.py
│ ├── ClusterInfoApp
│ │ ├── asgi.py
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── settings.cpython-310.pyc
│ │ │ ├── urls.cpython-310.pyc
│ │ │ └── wsgi.cpython-310.pyc
│ │ ├── settings.py
│ │ ├── urls.py
│ │ └── wsgi.py
│ ├── db.sqlite3
│ └── manage.py
├── server_data3.txt
└── test.sh
to push data to db from the .txt file
import os
import sys
import django
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(BASE_DIR)
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ClusterInfoApp.settings')
django.setup()
from cluster_info.models import NodeData
NodeData.objects.all().delete()
with open('/home/bastion/test/server_data3.txt', 'r') as file:
lines = file.readlines()
for line in lines:
fields = line.strip().split(',')
print(f"Fields: {fields}")
if len(fields) == 7:
hostname = fields[0]
ip_address = fields[1]
total_cores = int(fields[2])
total_mem = fields[3]
total_disk_os = fields[4]
total_disk_app = fields[5]
total_disk_boot = fields[6]
node_data = NodeData(
hostname=hostname,
ip_address=ip_address,
total_cores=total_cores,
total_mem=total_mem,
total_disk_os=total_disk_os,
total_disk_app=total_disk_app,
total_disk_boot=total_disk_boot
)
node_data.save()
else:
print(f"Invalid data format: {line}")
models.py
from django.db import models
class NodeData(models.Model):
hostname = models.CharField(max_length=100)
ip_address = models.CharField(max_length=100)
total_cores = models.IntegerField()
total_mem = models.CharField(max_length=20)
total_disk_os = models.CharField(max_length=20)
total_disk_app = models.CharField(max_length=20)
total_disk_boot = models.CharField(max_length=20)
class Meta:
app_label = 'cluster_info'
def __str__(self):
return self.hostname
urls.py (inside ‘cluster_info’)
from django.urls import path
from . import views
urlpatterns = [
path('', views.cluster_info, name='cluster_info'),
]
views.py
from django.shortcuts import render
from .models import NodeData
def cluster_info(request):
nodes = NodeData.objects.all()
return render(request, 'cluster_info.html', {'nodes': nodes})
templates/cluster_info.html
<!DOCTYPE html>
<html>
<head>
<title>Cluster Information</title>
<style>
/* Apply styles to the table */
table {
width: 100%;
border-collapse: collapse;
border: 1px solid #ddd;
}
th, td {
padding: 8px;
text-align: left;
border-bottom: 1px solid #ddd;
}
th {
background-color: #f2f2f2;
}
/* Add hover effect to table rows */
tr:hover {
background-color: #f5f5f5;
}
</style>
</head>
<body>
<h1>Cluster Information</h1>
<table border="1">
<tr>
<th>Hostname</th>
<th>IP Address</th>
<th>Total Cores</th>
<th>Total Memory</th>
<th>Total Disk OS</th>
<th>Total Disk App</th>
<th>Total Disk Boot</th>
</tr>
{% for node in nodes %}
<tr>
<td>{{ node.hostname }}</td>
<td>{{ node.ip_address }}</td>
<td>{{ node.total_cores }}</td>
<td>{{ node.total_mem }}</td>
<td>{{ node.total_disk_os }}</td>
<td>{{ node.total_disk_app }}</td>
<td>{{ node.total_disk_boot }}</td>
</tr>
{% endfor %}
</table>
</body>
</html>
urls.py (main)
from django.contrib import admin
from django.urls import path, include
urlpatterns = [
path('admin/', admin.site.urls),
path('cluster_info/', include('cluster_info.urls')),
]
To get Nodes info using K8s API
from kubernetes import client, config
import subprocess
def get_disk_total_space(path):
result = subprocess.run(["df", "-h", path], capture_output=True, text=True)
output = result.stdout.strip().split("\n")[-1].split()
if len(output) >= 2:
total_space = output[1]
return f"{total_space}"
else:
return "-"
def convert_kib_to_gib(kib_value_with_unit):
kib_value = int(kib_value_with_unit.split('Ki')[0])
gib_value = kib_value / (1024 * 1024) # Convert KiB to GiB
return f"{gib_value:.2f}Gi"
def get_node_info(api_instance):
nodes = api_instance.list_node().items
node_info = []
for node in nodes:
name = node.metadata.name
addresses = node.status.addresses
ip_address = next((addr.address for addr in addresses if addr.type == "InternalIP"), "N/A")
cpu_capacity = node.status.capacity['cpu']
memory_capacity_kb = node.status.capacity['memory']
memory_capacity_gb = convert_kib_to_gib(memory_capacity_kb)
disk_root = get_disk_total_space("/")
disk_app = get_disk_total_space("/app")
disk_containerd = get_disk_total_space("/var/lib/containerd")
node_info.append(f"{name},{ip_address},{cpu_capacity},{memory_capacity_gb},{disk_root},{disk_app},{disk_containerd}")
return node_info
def main():
config.load_kube_config() # Loads your kubeconfig file
v1 = client.CoreV1Api()
node_info = get_node_info(v1)
with open("node_info.txt", "w") as f:
f.write("\n".join(node_info))
if __name__ == '__main__':
main()
## output
$ cat node_info.txt
master,192.168.219.246,4,7.59Gi,56G,-,56G
worker,192.168.219.247,4,7.59Gi,56G,-,56G
Another way is
from kubernetes import client, config
import subprocess
def get_disk_total_space(path):
result = subprocess.run(["df", "-h", path], capture_output=True, text=True)
output = result.stdout.strip().split("\n")[-1].split()
if len(output) >= 2:
total_space = output[1]
return f"Total: {total_space}"
else:
return "Disk space information unavailable"
def get_node_info(api_instance):
nodes = api_instance.list_node().items
node_info = []
for node in nodes:
name = node.metadata.name
cpu_capacity = node.status.capacity['cpu']
memory_capacity = node.status.capacity['memory']
disk_root = get_disk_total_space("/")
disk_app = get_disk_total_space("/app")
disk_containerd = get_disk_total_space("/var/lib/containerd")
node_info.append(f"Node: {name}\nCPU Capacity: {cpu_capacity}\nMemory Capacity: {memory_capacity}\nDisk Usage - /: {disk_root}\nDisk Usage - /app: {disk_app}\nDisk Usage - /var/lib/containerd: {disk_containerd}\n")
return node_info
def main():
config.load_kube_config() # Loads your kubeconfig file
v1 = client.CoreV1Api()
node_info = get_node_info(v1)
with open("node_info.txt", "w") as f:
f.write("\n".join(node_info))
if __name__ == '__main__':
main()
## output
Node: master
CPU Capacity: 4
Memory Capacity: 7955872Ki
Disk Usage - /: Total: 56G
Disk Usage - /app: Disk space information unavailable
Disk Usage - /var/lib/containerd: Total: 56G
Node: worker
CPU Capacity: 4
Memory Capacity: 7955872Ki
Disk Usage - /: Total: 56G
Disk Usage - /app: Disk space information unavailable
Disk Usage - /var/lib/containerd: Total: 56G
Backuping the server_data.txt for the script
#!/bin/bash
timestamp=$(date +"%Y%m%d%H%M%S')
output_file="server_data_$timestamp.txt"
backup_dir="backup"
backup_file="$backup_dir/server_data_backup_$timestamp.txt"
input_file="nodes.txt"
kubectl get nodes | grep -v ^NAME | awk '{print $1}' > "$input_file"
server_vms=($(cat "$input_file"))
for hostname in "${server_vms[@]}"; do
data=$(ssh "$hostname" '
hostname
hostname -I | awk "{print \$1}"
nproc
free -h | awk "/Mem:/ {print \$2}"
df -h / | awk "NR==2{print \$2}"
df -h /run | awk "NR==2{print \$2}"
df -h /boot | awk "NR==2{print \$2}"
')
data_cleaned=$(echo "$data" | tr '\n' ',')
data_cleaned=${data_cleaned%,}
echo "$data_cleaned" >> "$output_file"
done
# Create a backup directory if it doesn't exist
mkdir -p "$backup_dir"
# Copy the current data to the backup file
cp "$output_file" "$backup_file"
rm "$input_file"
New Dockerfile
FROM python:3.9-slim
# Install required packages
RUN apt-get update && \
apt-get install -y cron vim && \
rm -rf /var/lib/apt/lists/*
ENV PYTHONUNBUFFERED 1
WORKDIR /app
RUN pip install django
COPY . /app/
EXPOSE 8000
# Add the crontab job
RUN echo "0 0 * * MON root python /app/data_script.py >> /var/log/data_script.log 2>&1" >> /etc/crontab
# Start cron service
CMD ["sh", "-c", "service cron start && python manage.py makemigrations && python manage.py migrate && python manage.py runserver 0.0.0.0:8000"]
<!DOCTYPE html>
<html>
<head>
<title>Cluster Information</title>
<style>
/* Apply styles to the table */
table {
width: 100%;
border-collapse: collapse;
border: 1px solid #ddd;
}
th, td {
padding: 8px;
text-align: left;
border-bottom: 1px solid #ddd;
}
th {
background-color: #f2f2f2;
}
/* Add hover effect to table rows */
tr:hover {
background-color: #f5f5f5;
}
</style>
</head>
<body>
<h1>Cluster Information</h1>
<input type="text" id="hostnameFilter" placeholder="Filter by Hostname">
<input type="text" id="ipAddressFilter" placeholder="Filter by IP Address">
<!-- Add similar input fields for other columns you want to filter -->
<table border="1">
<tr>
<th>Hostname</th>
<th>IP Address</th>
<th>Total Cores</th>
<th>Total Memory</th>
<th>Total Disk OS</th>
<th>Total Disk App</th>
<th>Total Disk Boot</th>
</tr>
{% for node in nodes %}
<tr id="row{{ forloop.counter }}">
<td>{{ node.hostname }}</td>
<td>{{ node.ip_address }}</td>
<td>{{ node.total_cores }}</td>
<td>{{ node.total_mem }}</td>
<td>{{ node.total_disk_os }}</td>
<td>{{ node.total_disk_app }}</td>
<td>{{ node.total_disk_boot }}</td>
</tr>
{% endfor %}
</table>
<script>
document.addEventListener("DOMContentLoaded", function () {
const rows = document.querySelectorAll("tr[id^='row']");
// Function to apply filtering
function applyFilters() {
const hostnameFilter = document.getElementById("hostnameFilter").value.toLowerCase();
const ipAddressFilter = document.getElementById("ipAddressFilter").value.toLowerCase();
rows.forEach(function (row) {
const hostname = row.querySelector("td:nth-child(1)").textContent.toLowerCase();
const ipAddress = row.querySelector("td:nth-child(2)").textContent.toLowerCase();
// Add similar variables for other columns you want to filter
if (hostname.includes(hostnameFilter) && ipAddress.includes(ipAddressFilter)) {
row.style.display = "";
} else {
row.style.display = "none";
}
});
}
// Add event listeners to input fields
document.getElementById("hostnameFilter").addEventListener("input", applyFilters);
document.getElementById("ipAddressFilter").addEventListener("input", applyFilters);
// Add similar event listeners for other input fields
// Initial filtering
applyFilters();
});
</script>
</body>
</html>
개선
- add etcd node collection in the python script
- backup node data file periodically
- get the Image ready → get python3.9-slim image, get full image with vim and crontab enabled
- Cluster Information table to have some kind of filter functionality
- Multi Cluster Info? → maybe if possible
Using Flask now
Updated Shell script that gets the data from the nodes and also gets the cluster name from the kube context
!/bin/bash
timestamp=$(date +'%Y%m%d')
output_file="server_data_$timestamp.txt"
backup_dir="backup"
backup_file="$backup_dir/server_data_backup_$timestamp.txt"
input_file="nodes.txt"
kubectl get nodes | grep -v ^NAME | awk '{print $1}' > $input_file
server_vms=($(cat "$input_file"))
full_clustername=$(kubectl config current-context) # Fetch the full cluster name from kubectl context
clustername=${full_clustername#*@}
for hostname in "${server_vms[@]}"; do
data=$(ssh "$hostname" '
hostname
hostname -I | awk "{print \$1}"
nproc
free -h | awk "/Mem:/ {print \$2}"
df -h / | awk "NR==2{print \$2}"
df -h /run | awk "NR==2{print \$2}"
df -h /boot | awk "NR==2{print \$2}"
')
data_cleaned=$(echo "$data" | tr '\n' ',')
data_cleaned=${data_cleaned%,}
echo "$clustername,$data_cleaned" >> "$output_file"
done
mkdir -p "$backup_dir"
cp "$output_file" "$backup_file"
rm "$input_file"
## The output
cluster.local,master,192.168.219.246,4,7.6Gi,56G,777M,2.0G
cluster.local,worker,192.168.219.247,4,7.6Gi,56G,777M,2.0G
The tree for the flask project looks like this
├── flask_app
│ ├── app.py
│ ├── data.txt
│ └── templates
│ └── template.html
The app.py looks like this
from flask import Flask, render_template
import csv
app = Flask(__name__)
def read_data():
data = []
with open('data.txt', 'r') as file:
reader = csv.reader(file)
for row in reader:
if len(row) >= 8:
data.append({
'clustername': row[0],
'hostname': row[1],
'ip_address': row[2],
'total_cores': row[3],
'total_mem': row[4],
'total_disk_os': row[5],
'total_disk_app': row[6],
'total_disk_boot': row[7]
})
return data
@app.route('/')
def display_data():
data = read_data()
return render_template('template.html', data=data)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080, debug=False)
The template looks like this
<!DOCTYPE html>
<html>
<head>
<title>Cluster Information</title>
<style>
body {
font-family: Arial, sans-serif;
}
table {
border-collapse: collapse;
width: 80%;
margin: auto;
margin-top: 30px;
}
th, td {
border: 1px solid #dddddd;
text-align: left;
padding: 8px;
}
th {
background-color: #f2f2f2;
}
tr:nth-child(even) {
background-color: #f2f2f2;
}
tr:hover {
background-color: #d3d3d3;
}
.center-header {
text-align: center;
margin: 20px 0;
}
</style>
<script>
document.addEventListener("DOMContentLoaded", function () {
const rows = document.querySelectorAll("tr[id^='row']");
// Function to apply filtering
function applyFilters() {
const clusterNameFilter = document.getElementById("clusterNameFilter").value.toLowerCase();
rows.forEach(function (row) {
const clusterName = row.querySelector("td:nth-child(1)").textContent.toLowerCase();
// Adjust the index according to your new data structure
if (clusterName.includes(clusterNameFilter)) {
row.style.display = "";
} else {
row.style.display = "none";
}
});
}
// Add event listener to input field
document.getElementById("clusterNameFilter").addEventListener("input", applyFilters);
// Initial filtering
applyFilters();
});
</script>
</head>
<body>
<div class="center-header">
<h1>Cluster Information</h1>
<input type="text" id="clusterNameFilter" placeholder="Cluster name Filter">
</div>
<table>
<thead>
<tr>
<th>Cluster Name</th>
<th>Hostname</th>
<th>IP Address</th>
<th>Total Cores</th>
<th>Total Memory</th>
<th>Total Disk OS</th>
<th>Total Disk App</th>
<th>Total Disk Containerd/ETCD</th>
</tr>
</thead>
<tbody>
{% for node in data %}
<tr id="row{{ loop.index }}">
<td>{{ node.clustername }}</td>
<td>{{ node.hostname }}</td>
<td>{{ node.ip_address }}</td>
<td>{{ node.total_cores }}</td>
<td>{{ node.total_mem }}</td>
<td>{{ node.total_disk_os }}</td>
<td>{{ node.total_disk_app }}</td>
<td>{{ node.total_disk_boot }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</body>
</html>
The script that pulls data from github repo
#!/bin/bash
timestamp=$(date +'%Y%m%d')
backup_dir="/root/flask_app/Backup"
local_path="/root/flask_app/pull"
mkdir -p "$local_path"
mkdir -p "$backup_dir"
# Set the GitHub repository URL
github_repo="https://github.com/Waji-97/data-test.git"
# Clone the GitHub repository containing the data files
cd "$local_path" || exit
git clone "$github_repo" .
# Set the path to the directory containing the data.txt files
data_directory="$local_path"
# Set the path to the final compiled data file
final_data_file="/root/flask_app/data.txt"
cp "$final_data_file" "$backup_dir/data_$timestamp.txt"
rm "$final_data_file"
# Set the path to the backup directory
#backup_dir="/root/flask_app/Backup/"
#mkdir -p "$backup_dir"
# Create an empty file for the compiled data
echo -n > "$final_data_file"
# Loop through each data.txt file and append its contents to the final file
for data_file in "$data_directory"/data-*.txt; do
if [ -f "$data_file" ]; then
cat "$data_file" >> "$final_data_file"
#echo >> "$final_data_file"
fi
done
# Move the final data file to the Flask app directory
#mv "$final_data_file" "/root/flask_app"
# Remove the cloned repository
rm -rf "$local_path"
## To remove backup files over a week.. (Not tested)
find "$backup_dir" -type f -name "data_*.txt" -ctime +7 -exec rm {} \;