2024-09-13 16:58:34 +08:00

234 lines
6.9 KiB
Python

import os
import threading
import socket
import logging
import psutil
import GPUtil
import platform
from flask import Flask, jsonify, request, send_from_directory
from flask_cors import CORS
from tensorboard import program
from PytorchBoot.utils.project_util import ProjectUtil
from PytorchBoot.stereotype import get_all_component_classes, get_all_component_comments
from PytorchBoot.application import application_class
from PytorchBoot.status import status_manager
from PytorchBoot.utils.log_util import Log
from PytorchBoot.utils.timer_util import Timer
app = Flask(__name__, static_folder="../client")
app.logger.setLevel("WARNING")
logging.getLogger("werkzeug").disabled = True
CORS(app)
root_path = os.getcwd()
ProjectUtil.scan_project(root_path)
configs = ProjectUtil.scan_configs(root_path)
running_tensorboard = {}
@app.route("/")
def serve_index():
return send_from_directory(app.static_folder, "index.html")
@app.route("/<path:path>")
def serve_file(path):
return send_from_directory(app.static_folder, path)
@app.route("/test", methods=["POST"])
def hello_world():
return jsonify(message="Hello, World!")
@app.route("/project/structure", methods=["POST"])
def project_structure():
component_info = {}
for st, cls_dict in get_all_component_classes().items():
component_info[st] = {k: v.__name__ for k, v in cls_dict.items()}
comment_info = get_all_component_comments()
app_info = {}
for app_name, app_cls in application_class.items():
app_info[app_name] = app_cls.__name__
return jsonify(
components=component_info,
comments=comment_info,
applications=app_info,
configs=configs,
root_path=root_path,
)
@app.route("/project/run_app", methods=["POST"])
def run_application():
data = request.json
app_name = data.get("app_name")
app_cls = application_class.get(app_name)
if app_cls is None:
Log.error(
f"No class annotated with @PytorchBootApplication found with the name '{app_name}'.",
True,
)
return jsonify(
{
"message": f"No application found with the name '{app_name}'",
"status": "error",
}
)
if not hasattr(app_cls, "start"):
Log.error(
"The class annotated with @PytorchBootApplication should have a 'start' method.",
True,
)
return jsonify(
{"message": "The class should have a 'start' method", "status": "error"}
)
def run_in_background():
Log.info(f"Application '{app_cls.__name__}' started.")
timer = Timer("Application")
timer.start()
status_manager.run_app(app_name, app_cls)
app_cls.start()
status_manager.end_app(app_name)
timer.stop()
Log.info(timer.get_elasped_time_str(Timer.HOURS))
Log.success("Application finished.")
threading.Thread(target=run_in_background).start()
return jsonify(
{"message": f"Application '{app_name}' is running now.", "status": "success"}
)
@app.route("/project/get_status", methods=["POST"])
def get_status():
cpu_info = {
"model": platform.processor(),
"usage_percent": psutil.cpu_percent(interval=1),
}
virtual_memory = psutil.virtual_memory()
memory_info = {
"used": round(virtual_memory.used / (1024**3), 3),
"total": round(virtual_memory.total / (1024**3), 3),
}
gpus = GPUtil.getGPUs()
gpu_info = []
for gpu in gpus:
gpu_info.append(
{
"name": gpu.name,
"memory_used": gpu.memoryUsed,
"memory_total": gpu.memoryTotal,
}
)
return jsonify(
curr_status=status_manager.get_status(),
last_status=status_manager.get_last_status(),
logs=status_manager.get_log(),
progress=status_manager.get_progress(),
running_apps=status_manager.get_running_apps(),
cpu=cpu_info,
memory=memory_info,
gpus=gpu_info,
)
@app.route("/project/set_status", methods=["POST"])
def set_status():
status = request.json.get("status")
progress = request.json.get("progress")
if status:
status_manager.set_status(
app_name=status["app_name"],
runner_name=status["runner_name"],
key=status["key"],
value=status["value"],
)
if progress:
status_manager.set_progress(
app_name=progress["app_name"],
runner_name=progress["runner_name"],
key=progress["key"],
curr_value=progress["curr_value"],
max_value=progress["max_value"],
)
return jsonify({"status": "success"})
@app.route("/project/add_log", methods=["POST"])
def add_log():
log = request.json.get("log")
Log.log(log["message"], log["log_type"])
return jsonify({"status": "success"})
def find_free_port(start_port):
"""Find a free port starting from start_port."""
port = start_port
while True:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
result = sock.connect_ex(("localhost", port))
if result != 0:
return port
port += 1
def start_tensorboard(log_dir, port):
"""Starts TensorBoard in a separate thread."""
tb = program.TensorBoard()
tb.configure(argv=[None, "--logdir", log_dir, "--port", str(port)])
tb.launch()
@app.route("/tensorboard/run", methods=["POST"])
def run_tensorboard():
data = request.json
log_dir = data.get("log_dir")
if log_dir in running_tensorboard:
return jsonify(
{
"message": f"TensorBoard ({running_tensorboard[log_dir]}) is already running for <{log_dir}>",
"url": running_tensorboard[log_dir],
"status": "warning",
}
)
if not os.path.isdir(log_dir):
return jsonify({"message": "Log directory does not exist", "status": "error"})
port = find_free_port(10000)
try:
tb_thread = threading.Thread(target=start_tensorboard, args=(log_dir, port))
tb_thread.start()
except Exception as e:
return jsonify(
{"message": f"Error starting TensorBoard: {str(e)}", "status": "error"}
)
url = f"http://localhost:{port}"
running_tensorboard[log_dir] = url
return jsonify(
{"url": url, "message": f"TensorBoard is running at {url}", "status": "success"}
)
@app.route("/tensorboard/dirs", methods=["POST"])
def get_tensorboard_dirs():
tensorboard_dirs = []
for root, dirs, _ in os.walk(root_path):
for dir_name in dirs:
if dir_name == "tensorboard":
tensorboard_dirs.append(os.path.join(root, dir_name))
return jsonify({"tensorboard_dirs": tensorboard_dirs})
@app.route("/tensorboard/running_tensorboards", methods=["POST"])
def get_running_tensorboards():
return jsonify(running_tensorboards=running_tensorboard)