From 965467d12924e7b1504c9054479802288bf6c969 Mon Sep 17 00:00:00 2001 From: Antonio De Lucreziis Date: Thu, 4 May 2023 02:15:28 +0200 Subject: [PATCH] feat: added basic scaffolding for host executor --- backend/database/{store.go => database.go} | 4 +- backend/database/mem.go | 13 +- backend/executor/exec.go | 17 ++- backend/executor/hostrunner.go | 72 +++++++++++ backend/executor/mock.go | 49 ++++---- backend/model/model.go | 2 +- backend/routes/api.go | 132 ++++++++++++++++++++- 7 files changed, 247 insertions(+), 42 deletions(-) rename backend/database/{store.go => database.go} (85%) create mode 100644 backend/executor/hostrunner.go diff --git a/backend/database/store.go b/backend/database/database.go similarity index 85% rename from backend/database/store.go rename to backend/database/database.go index b592858..534fd36 100644 --- a/backend/database/store.go +++ b/backend/database/database.go @@ -7,8 +7,8 @@ import ( ) type Database interface { - GetNodeStatus(hostname string) (*model.Node, error) - GetJobStatus(id int) (*model.Job, error) + GetNode(hostname string) (*model.Node, error) + GetJob(id string) (*model.Job, error) AllNodes() ([]*model.Node, error) AllJobs() ([]*model.Job, error) diff --git a/backend/database/mem.go b/backend/database/mem.go index 18da34b..88a4447 100644 --- a/backend/database/mem.go +++ b/backend/database/mem.go @@ -5,6 +5,7 @@ import ( "git.phc.dm.unipi.it/phc/cluster-dashboard/backend/executor" "git.phc.dm.unipi.it/phc/cluster-dashboard/backend/model" + "golang.org/x/exp/maps" ) // simpleDB รจ una implementazione di [database.Database] che tiene giusto una cache in memoria e @@ -17,10 +18,10 @@ type simpleDB struct { lastUpdate *time.Time // Nodes is a map from hostname to node info - nodes map[string]model.Node + nodes map[string]*model.Node // Jobs is a map from job id to job info - jobs map[int]model.Job + jobs map[string]*model.Job // The following are maps from hostname to a list of sampled temperatures, used memory, used storage space and network upload and download rate. @@ -35,20 +36,20 @@ func NewSimpleDatabase(ex executor.Service) Database { return &simpleDB{Executor: ex} } -func (s *simpleDB) GetNodeStatus(hostname string) (*model.Node, error) { +func (s *simpleDB) GetNode(hostname string) (*model.Node, error) { panic("todo") } -func (s *simpleDB) GetJobStatus(id int) (*model.Job, error) { +func (s *simpleDB) GetJob(id string) (*model.Job, error) { panic("todo") } func (s *simpleDB) AllNodes() ([]*model.Node, error) { - panic("todo") + return maps.Values(s.nodes), nil } func (s *simpleDB) AllJobs() ([]*model.Job, error) { - panic("todo") + return maps.Values(s.jobs), nil } func (s *simpleDB) QueryTemperatureSamples(from, to time.Time) ([]model.Sample[float64], error) { diff --git a/backend/executor/exec.go b/backend/executor/exec.go index 356ae98..334f549 100644 --- a/backend/executor/exec.go +++ b/backend/executor/exec.go @@ -1,16 +1,15 @@ package executor -import "time" - // Service is a service that handles executing commands on the main host and does a first processing of the raw data it gets from the system type Service interface { - SlurmQueue() []string - SlurmJobs() []string + SlurmQueue() ([]string, error) + SlurmJobs() ([]string, error) - NodeUptime(hostname string) time.Time + NodeUptime(hostname string) (string, error) - Temperature(hostname string) float64 - MemoryUsage(hostname string) int64 - StorageUsage(hostname string) int64 - NetworkUploadDownload(hostname string) (int64, int64) + Temperature(hostname string) (string, error) + MemoryUsage(hostname string) (string, error) + StorageUsage(hostname string) (string, error) + NetworkUpload(hostname string) (string, error) + NetworkDownload(hostname string) (string, error) } diff --git a/backend/executor/hostrunner.go b/backend/executor/hostrunner.go new file mode 100644 index 0000000..5c15e66 --- /dev/null +++ b/backend/executor/hostrunner.go @@ -0,0 +1,72 @@ +package executor + +import ( + "bytes" + "fmt" + "os/exec" + "strings" +) + +type HostRunner struct{} + +func (HostRunner) SlurmQueue() ([]string, error) { + panic("todo") +} + +func (HostRunner) SlurmJobs() ([]string, error) { + panic("todo") +} + +func (HostRunner) NodeUptime(hostname string) (string, error) { + panic("todo") +} + +func (HostRunner) Temperature(hostname string) (string, error) { + panic("todo") +} + +func (HostRunner) MemoryUsage(hostname string) (string, error) { + panic("todo") +} + +func (HostRunner) StorageUsage(hostname string) (string, error) { + panic("todo") +} + +func (HostRunner) NetworkUpload(hostname string) (string, error) { + panic("todo") +} + +func (HostRunner) NetworkDownload(hostname string) (string, error) { + panic("todo") +} + +// +// Internals +// + +// run a shell command and returns its output as string +func run(command string) (string, error) { + buf := &bytes.Buffer{} + + // ctx := context.Background() + // ctx, cancel := context.WithTimeout(ctx, 1*time.Second) + // defer cancel() + // cmd := exec.CommandContext(ctx, "sh", "-c", command) + + cmd := exec.Command("sh", "-c", command) + cmd.Stdout = buf + + // BUG: Al momento se questo comando ci mette troppo tutto si blocca, quindi sarebbe meglio + // usare un context per dare un timeout al comando + if err := cmd.Run(); err != nil { + return "", err + } + + return buf.String(), nil +} + +// run a shell command on a given remote via ssh and returns its output as string, does a bit of escaping +func runOnRemote(destination, command string) (string, error) { + return run(fmt.Sprintf(`ssh %s '%s'`, destination, strings.ReplaceAll(command, `'`, `\'`))) +} diff --git a/backend/executor/mock.go b/backend/executor/mock.go index 0cb0a2e..4bc9427 100644 --- a/backend/executor/mock.go +++ b/backend/executor/mock.go @@ -1,43 +1,46 @@ package executor -import "time" - var _ Service = &Mock{} type Mock struct { - SlurmQueueFunc func() []string - SlurmJobsFunc func() []string - NodeUptimeFunc func(hostname string) time.Time - TemperatureFunc func(hostname string) float64 - MemoryUsageFunc func(hostname string) int64 - StorageUsageFunc func(hostname string) int64 - NetworkUploadDownloadFunc func(hostname string) (int64, int64) + SlurmQueueFunc func() ([]string, error) + SlurmJobsFunc func() ([]string, error) + NodeUptimeFunc func(hostname string) (string, error) + TemperatureFunc func(hostname string) (string, error) + MemoryUsageFunc func(hostname string) (string, error) + StorageUsageFunc func(hostname string) (string, error) + NetworkUploadFunc func(hostname string) (string, error) + NetworkDownloadFunc func(hostname string) (string, error) +} + +func (m Mock) SlurmQueue() ([]string, error) { + return m.SlurmQueueFunc() } -func (ex *Mock) SlurmQueue() []string { - return ex.SlurmQueueFunc() +func (m Mock) SlurmJobs() ([]string, error) { + return m.SlurmJobsFunc() } -func (ex *Mock) SlurmJobs() []string { - return ex.SlurmJobsFunc() +func (m Mock) NodeUptime(hostname string) (string, error) { + return m.NodeUptimeFunc(hostname) } -func (ex *Mock) NodeUptime(hostname string) time.Time { - return ex.NodeUptimeFunc(hostname) +func (m Mock) Temperature(hostname string) (string, error) { + return m.TemperatureFunc(hostname) } -func (ex *Mock) Temperature(hostname string) float64 { - return ex.TemperatureFunc(hostname) +func (m Mock) MemoryUsage(hostname string) (string, error) { + return m.MemoryUsageFunc(hostname) } -func (ex *Mock) MemoryUsage(hostname string) int64 { - return ex.MemoryUsageFunc(hostname) +func (m Mock) StorageUsage(hostname string) (string, error) { + return m.StorageUsageFunc(hostname) } -func (ex *Mock) StorageUsage(hostname string) int64 { - return ex.StorageUsageFunc(hostname) +func (m Mock) NetworkUpload(hostname string) (string, error) { + return m.NetworkUploadFunc(hostname) } -func (ex *Mock) NetworkUploadDownload(hostname string) (int64, int64) { - return ex.NetworkUploadDownloadFunc(hostname) +func (m Mock) NetworkDownload(hostname string) (string, error) { + return m.NetworkDownloadFunc(hostname) } diff --git a/backend/model/model.go b/backend/model/model.go index ae68ac2..a21dee6 100644 --- a/backend/model/model.go +++ b/backend/model/model.go @@ -10,7 +10,7 @@ type Numeric = constraints.Ordered // Job contiene le informazioni su un lavoro di slurm (il nostro gestore di code di lavori) type Job struct { - Id int `json:"id"` + Id string `json:"id"` Name string `json:"name"` Status string `json:"status"` diff --git a/backend/routes/api.go b/backend/routes/api.go index 0db67a0..4c1837e 100644 --- a/backend/routes/api.go +++ b/backend/routes/api.go @@ -1,9 +1,139 @@ package routes -import "github.com/gofiber/fiber/v2" +import ( + "fmt" + "time" + + "github.com/gofiber/fiber/v2" +) func (r *Router) Api(api fiber.Router) { api.Get("/status", func(c *fiber.Ctx) error { return c.JSON("ok") }) + + // GetNodeStatus + api.Get("/node/:hostname", func(c *fiber.Ctx) error { + hostname := c.Params("hostname") + if hostname == "" { + return fmt.Errorf(`must provide hostname`) + } + + node, err := r.Database.GetNode(hostname) + if err != nil { + return err + } + + return c.JSON(node) + }) + + // GetJobStatus + api.Get("/job/:id", func(c *fiber.Ctx) error { + id := c.Params("id") + if id == "" { + return fmt.Errorf(`must provide id`) + } + + job, err := r.Database.GetJob(id) + if err != nil { + return err + } + + return c.JSON(job) + }) + + // AllNodes + api.Get("/nodes", func(c *fiber.Ctx) error { + nodes, err := r.Database.AllNodes() + if err != nil { + return err + } + + return c.JSON(nodes) + }) + + // AllJobs + api.Get("/jobs", func(c *fiber.Ctx) error { + jobs, err := r.Database.AllJobs() + if err != nil { + return err + } + + return c.JSON(jobs) + }) + + // QueryTemperatureSamples + api.Get("/stats/temperature", QueryTimeRangeMiddleware(func(c *fiber.Ctx, from, to time.Time) error { + samples, err := r.Database.QueryTemperatureSamples(from, to) + if err != nil { + return err + } + + return c.JSON(samples) + })) + + // QueryMemorySamples + api.Get("/stats/memory", QueryTimeRangeMiddleware(func(c *fiber.Ctx, from, to time.Time) error { + samples, err := r.Database.QueryMemorySamples(from, to) + if err != nil { + return err + } + + return c.JSON(samples) + })) + + // QueryStorageSamples + api.Get("/stats/storage", QueryTimeRangeMiddleware(func(c *fiber.Ctx, from, to time.Time) error { + samples, err := r.Database.QueryStorageSamples(from, to) + if err != nil { + return err + } + + return c.JSON(samples) + })) + + // QueryNetworkUploadSamples + api.Get("/stats/network-upload", QueryTimeRangeMiddleware(func(c *fiber.Ctx, from, to time.Time) error { + samples, err := r.Database.QueryNetworkUploadSamples(from, to) + if err != nil { + return err + } + + return c.JSON(samples) + })) + + // QueryNetworkDownloadSamples + api.Get("/stats/network-download", QueryTimeRangeMiddleware(func(c *fiber.Ctx, from, to time.Time) error { + samples, err := r.Database.QueryNetworkDownloadSamples(from, to) + if err != nil { + return err + } + + return c.JSON(samples) + })) +} + +func QueryTimeRangeMiddleware(queryHandler func(c *fiber.Ctx, from, to time.Time) error) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + // by default just show 1 day of data + from, err := parseTimeParam(c.Query("from"), time.Now().Add(-24*time.Hour)) + if err != nil { + return err + } + + to, err := parseTimeParam(c.Query("to"), time.Now()) + if err != nil { + return err + } + + return queryHandler(c, from, to) + } +} + +func parseTimeParam(s string, defaultValue time.Time) (time.Time, error) { + if s == "" { + return defaultValue, nil + } + + return time.Parse(time.RFC3339, s) }