Updated EllieBot.Coordinator

This commit is contained in:
Toastie 2024-03-23 14:34:09 +13:00
parent 960f6f093c
commit c69f7951a7
Signed by: toastie_t0ast
GPG key ID: 27F3B6855AFD40A4
18 changed files with 993 additions and 2 deletions

View file

@ -17,7 +17,11 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "EllieBot", "src\EllieBot\El
EndProject EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ayu", "ayu", "{872A4C63-833C-4AE0-91AB-3CE348D3E6F8}" Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ayu", "ayu", "{872A4C63-833C-4AE0-91AB-3CE348D3E6F8}"
EndProject EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ayu.Discord.Voice", "src\ayu\Ayu.Discord.Voice\Ayu.Discord.Voice.csproj", "{5AD2EFFB-7774-49B2-A791-3BAC4DAEE067}" Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ayu.Discord.Voice", "src\ayu\Ayu.Discord.Voice\Ayu.Discord.Voice.csproj", "{5AD2EFFB-7774-49B2-A791-3BAC4DAEE067}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "EllieBot.Tests", "src\EllieBot.Tests\EllieBot.Tests.csproj", "{179DF3B3-AD32-4335-8231-9818338DF3A2}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "EllieBot.Coordinator", "src\EllieBot.Coordinator\EllieBot.Coordinator.csproj", "{A631DDF0-3AD1-4CB9-8458-314B1320868A}"
EndProject EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
@ -33,6 +37,14 @@ Global
{5AD2EFFB-7774-49B2-A791-3BAC4DAEE067}.Debug|Any CPU.Build.0 = Debug|Any CPU {5AD2EFFB-7774-49B2-A791-3BAC4DAEE067}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5AD2EFFB-7774-49B2-A791-3BAC4DAEE067}.Release|Any CPU.ActiveCfg = Release|Any CPU {5AD2EFFB-7774-49B2-A791-3BAC4DAEE067}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5AD2EFFB-7774-49B2-A791-3BAC4DAEE067}.Release|Any CPU.Build.0 = Release|Any CPU {5AD2EFFB-7774-49B2-A791-3BAC4DAEE067}.Release|Any CPU.Build.0 = Release|Any CPU
{179DF3B3-AD32-4335-8231-9818338DF3A2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{179DF3B3-AD32-4335-8231-9818338DF3A2}.Debug|Any CPU.Build.0 = Debug|Any CPU
{179DF3B3-AD32-4335-8231-9818338DF3A2}.Release|Any CPU.ActiveCfg = Release|Any CPU
{179DF3B3-AD32-4335-8231-9818338DF3A2}.Release|Any CPU.Build.0 = Release|Any CPU
{A631DDF0-3AD1-4CB9-8458-314B1320868A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A631DDF0-3AD1-4CB9-8458-314B1320868A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A631DDF0-3AD1-4CB9-8458-314B1320868A}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A631DDF0-3AD1-4CB9-8458-314B1320868A}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE
@ -41,6 +53,8 @@ Global
{BCB21472-84D2-4B63-B5DD-31E6A3EC9791} = {B28FB883-9688-41EB-BF5A-945F4A4EB628} {BCB21472-84D2-4B63-B5DD-31E6A3EC9791} = {B28FB883-9688-41EB-BF5A-945F4A4EB628}
{872A4C63-833C-4AE0-91AB-3CE348D3E6F8} = {B28FB883-9688-41EB-BF5A-945F4A4EB628} {872A4C63-833C-4AE0-91AB-3CE348D3E6F8} = {B28FB883-9688-41EB-BF5A-945F4A4EB628}
{5AD2EFFB-7774-49B2-A791-3BAC4DAEE067} = {872A4C63-833C-4AE0-91AB-3CE348D3E6F8} {5AD2EFFB-7774-49B2-A791-3BAC4DAEE067} = {872A4C63-833C-4AE0-91AB-3CE348D3E6F8}
{179DF3B3-AD32-4335-8231-9818338DF3A2} = {B28FB883-9688-41EB-BF5A-945F4A4EB628}
{A631DDF0-3AD1-4CB9-8458-314B1320868A} = {B28FB883-9688-41EB-BF5A-945F4A4EB628}
EndGlobalSection EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {79F61C2C-CDBB-4361-A234-91A0B334CFE4} SolutionGuid = {79F61C2C-CDBB-4361-A234-91A0B334CFE4}

View file

@ -1,3 +1,4 @@
# List of things to do # List of things to do
- Finish the full system rewrite - Finish the full system rewrite
- Finish the EllieBot.Tests project

View file

@ -0,0 +1,47 @@
using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Hosting;
using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
namespace EllieBot.Coordinator
{
public class CoordStartup
{
public IConfiguration Configuration { get; }
public CoordStartup(IConfiguration config)
=> Configuration = config;
public void ConfigureServices(IServiceCollection services)
{
services.AddGrpc();
services.AddSingleton<CoordinatorRunner>();
services.AddSingleton<IHostedService, CoordinatorRunner>(
serviceProvider => serviceProvider.GetRequiredService<CoordinatorRunner>());
}
public void Configure(IApplicationBuilder app, IWebHostEnvironment env)
{
if (env.IsDevelopment())
{
app.UseDeveloperExceptionPage();
}
app.UseRouting();
app.UseEndpoints(endpoints =>
{
endpoints.MapGrpcService<CoordinatorService>();
endpoints.MapGet("/",
async context =>
{
await context.Response.WriteAsync(
"Communication with gRPC endpoints must be made through a gRPC client. To learn how to create a client, visit: https://go.microsoft.com/fwlink/?linkid=2086909");
});
});
}
}
}

View file

@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk.Web">
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
</PropertyGroup>
<ItemGroup>
<Protobuf Include="Protos\coordinator.proto" GrpcServices="Server" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Grpc.AspNetCore" Version="2.47.0" />
<PackageReference Include="Serilog" Version="2.11.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="4.0.1" />
<PackageReference Include="Serilog.Sinks.File" Version="5.0.0" />
<PackageReference Include="YamlDotNet" Version="11.2.1" />
</ItemGroup>
</Project>

View file

@ -0,0 +1,43 @@
using System;
using System.Text;
using Serilog;
using Serilog.Events;
using Serilog.Sinks.SystemConsole.Themes;
namespace EllieBot.Services
{
public static class LogSetup
{
public static void SetupLogger(object source)
{
Log.Logger = new LoggerConfiguration()
.MinimumLevel.Override("Microsoft", LogEventLevel.Information)
.MinimumLevel.Override("System", LogEventLevel.Information)
.MinimumLevel.Override("Microsoft.AspNetCore", LogEventLevel.Warning)
.Enrich.FromLogContext()
.WriteTo.File("coord.log", LogEventLevel.Information,
rollOnFileSizeLimit: true,
fileSizeLimitBytes: 10_000_000)
.WriteTo.Console(LogEventLevel.Information,
theme: GetTheme(),
outputTemplate: "[{Timestamp:HH:mm:ss} {Level:u3}] | #{LogSource} | {Message:lj}{NewLine}{Exception}")
.Enrich.WithProperty("LogSource", source)
.CreateLogger();
Console.OutputEncoding = Encoding.UTF8;
}
private static ConsoleTheme GetTheme()
{
if (Environment.OSVersion.Platform == PlatformID.Unix)
return AnsiConsoleTheme.Code;
#if DEBUG
return AnsiConsoleTheme.Code;
#else
return ConsoleTheme.None;
#endif
}
}
}

View file

@ -0,0 +1,20 @@
using System;
using Microsoft.AspNetCore.Hosting;
using Microsoft.Extensions.Hosting;
using EllieBot.Coordinator;
using EllieBot.Services;
using Serilog;
// Additional configuration is required to successfully run gRPC on macOS.
// For instructions on how to configure Kestrel and gRPC clients on macOS, visit https://go.microsoft.com/fwlink/?linkid=2099682
static IHostBuilder CreateHostBuilder(string[] args) =>
Host.CreateDefaultBuilder(args)
.ConfigureWebHostDefaults(webBuilder =>
{
webBuilder.UseStartup<CoordStartup>();
});
LogSetup.SetupLogger("coord");
Log.Information("Starting coordinator... Pid: {ProcessId}", Environment.ProcessId);
CreateHostBuilder(args).Build().Run();

View file

@ -0,0 +1,13 @@
{
"profiles": {
"EllieBot.Coordinator": {
"commandName": "Project",
"dotnetRunMessages": "true",
"launchBrowser": false,
"applicationUrl": "http://localhost:3442;https://localhost:3443",
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
}
}
}
}

View file

@ -0,0 +1,127 @@
syntax = "proto3";
import "google/protobuf/timestamp.proto";
option csharp_namespace = "EllieBot.Coordinator";
package elliebot;
service Coordinator {
// sends update to coordinator to let it know that the shard is alive
rpc Heartbeat(HeartbeatRequest) returns (HeartbeatReply);
// restarts a shard given the id
rpc RestartShard(RestartShardRequest) returns (RestartShardReply);
// reshards given the new number of shards
rpc Reshard(ReshardRequest) returns (ReshardReply);
// Reload config
rpc Reload(ReloadRequest) returns (ReloadReply);
// Gets status of a single shard
rpc GetStatus(GetStatusRequest) returns (GetStatusReply);
// Get status of all shards
rpc GetAllStatuses(GetAllStatusesRequest) returns (GetAllStatusesReply);
// Restarts all shards. Queues them to be restarted at a normal rate. Setting Nuke to true will kill all shards right
// away
rpc RestartAllShards(RestartAllRequest) returns (RestartAllReply);
// kill coordinator (and all shards as a consequence)
rpc Die(DieRequest) returns (DieReply);
rpc SetConfigText(SetConfigTextRequest) returns (SetConfigTextReply);
rpc GetConfigText(GetConfigTextRequest) returns (GetConfigTextReply);
}
enum ConnState {
Disconnected = 0;
Connecting = 1;
Connected = 2;
}
message HeartbeatRequest {
int32 shardId = 1;
int32 guildCount = 2;
ConnState state = 3;
}
message HeartbeatReply {
bool gracefulImminent = 1;
}
message RestartShardRequest {
int32 shardId = 1;
// should it be queued for restart, set false to kill it and restart immediately with priority
bool queue = 2;
}
message RestartShardReply {
}
message ReshardRequest {
int32 shards = 1;
}
message ReshardReply {
}
message ReloadRequest {
}
message ReloadReply {
}
message GetStatusRequest {
int32 shardId = 1;
}
message GetStatusReply {
int32 shardId = 1;
ConnState state = 2;
int32 guildCount = 3;
google.protobuf.Timestamp lastUpdate = 4;
bool scheduledForRestart = 5;
google.protobuf.Timestamp startedAt = 6;
}
message GetAllStatusesRequest {
}
message GetAllStatusesReply {
repeated GetStatusReply Statuses = 1;
}
message RestartAllRequest {
bool nuke = 1;
}
message RestartAllReply {
}
message DieRequest {
bool graceful = 1;
}
message DieReply {
}
message GetConfigTextRequest {
}
message GetConfigTextReply {
string configYml = 1;
}
message SetConfigTextRequest {
string configYml = 1;
}
message SetConfigTextReply {
bool success = 1;
string error = 2;
}

View file

@ -0,0 +1,11 @@
# Coordinator project
Grpc-based coordinator useful for sharded EllieBot. Its purpose is controlling the lifetime and checking status of the shards it creates.
### Supports
- Checking status
- Individual shard restarts
- Full shard restarts
- Graceful coordinator restarts (restart/update coordinator without killing shards)
- Kill/Stop

View file

@ -0,0 +1,457 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Hosting;
using Serilog;
using YamlDotNet.Serialization;
namespace EllieBot.Coordinator
{
public sealed class CoordinatorRunner : BackgroundService
{
private const string CONFIG_PATH = "coord.yml";
private const string GRACEFUL_STATE_PATH = "graceful.json";
private const string GRACEFUL_STATE_BACKUP_PATH = "graceful_old.json";
private readonly Serializer _serializer;
private readonly Deserializer _deserializer;
private Config _config;
private ShardStatus[] _shardStatuses;
private readonly object locker = new object();
private readonly Random _rng;
private bool _gracefulImminent;
public CoordinatorRunner()
{
_serializer = new();
_deserializer = new();
_config = LoadConfig();
_rng = new Random();
if (!TryRestoreOldState())
InitAll();
}
private Config LoadConfig()
{
lock (locker)
{
return _deserializer.Deserialize<Config>(File.ReadAllText(CONFIG_PATH));
}
}
private void SaveConfig(in Config config)
{
lock (locker)
{
var output = _serializer.Serialize(config);
File.WriteAllText(CONFIG_PATH, output);
}
}
public void ReloadConfig()
{
lock (locker)
{
var oldConfig = _config;
var newConfig = LoadConfig();
if (oldConfig.TotalShards != newConfig.TotalShards)
{
KillAll();
}
_config = newConfig;
if (oldConfig.TotalShards != newConfig.TotalShards)
{
InitAll();
}
}
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
// Log.Information("Executing");
bool first = true;
while (!stoppingToken.IsCancellationRequested)
{
try
{
bool hadAction = false;
lock (locker)
{
var shardIds = Enumerable.Range(0, 1) // shard 0 is always first
.Append((int)((1173494918812024863 >> 22) % _config.TotalShards)) // then ellie server shard
.Concat(Enumerable.Range(1, _config.TotalShards - 1)
.OrderBy(_ => _rng.Next())) // then all other shards in a random order
.Distinct()
.ToList();
if (first)
{
// Log.Information("Startup order: {StartupOrder}",string.Join(' ', shardIds));
first = false;
}
foreach (var shardId in shardIds)
{
if (stoppingToken.IsCancellationRequested)
break;
var status = _shardStatuses[shardId];
if (status.ShouldRestart)
{
Log.Warning("Shard {ShardId} is restarting (scheduled)...", shardId);
hadAction = true;
StartShard(shardId);
break;
}
if (DateTime.UtcNow - status.LastUpdate >
TimeSpan.FromSeconds(_config.UnresponsiveSec))
{
Log.Warning("Shard {ShardId} is restarting (unresponsive)...", shardId);
hadAction = true;
StartShard(shardId);
break;
}
if (status.StateCounter > 8 && status.State != ConnState.Connected)
{
Log.Warning("Shard {ShardId} is restarting (stuck)...", shardId);
hadAction = true;
StartShard(shardId);
break;
}
try
{
if (status.Process is null or { HasExited: true })
{
Log.Warning("Shard {ShardId} is starting (process)...", shardId);
hadAction = true;
StartShard(shardId);
break;
}
}
catch (InvalidOperationException)
{
Log.Warning("Process for shard {ShardId} is bugged... ", shardId);
hadAction = true;
StartShard(shardId);
break;
}
}
}
if (hadAction)
{
await Task.Delay(_config.RecheckIntervalMs, stoppingToken).ConfigureAwait(false);
}
}
catch (Exception ex)
{
Log.Error(ex, "Error in coordinator: {Message}", ex.Message);
}
await Task.Delay(5000, stoppingToken).ConfigureAwait(false);
}
}
private void StartShard(int shardId)
{
var status = _shardStatuses[shardId];
try
{
status.Process?.Kill(true);
}
catch
{
}
try
{
status.Process?.Dispose();
}
catch
{
}
var proc = StartShardProcess(shardId);
_shardStatuses[shardId] = status with
{
Process = proc,
LastUpdate = DateTime.UtcNow,
State = ConnState.Disconnected,
ShouldRestart = false,
StateCounter = 0,
};
}
private Process StartShardProcess(int shardId)
=> Process.Start(new ProcessStartInfo()
{
FileName = _config.ShardStartCommand,
Arguments = string.Format(_config.ShardStartArgs,
shardId,
_config.TotalShards),
EnvironmentVariables =
{
{"ELLIEBOT_IS_COORDINATED", "1"}
}
// CreateNoWindow = true,
// UseShellExecute = false,
});
public bool Heartbeat(int shardId, int guildCount, ConnState state)
{
lock (locker)
{
if (shardId >= _shardStatuses.Length)
throw new ArgumentOutOfRangeException(nameof(shardId));
var status = _shardStatuses[shardId];
status = _shardStatuses[shardId] = status with
{
GuildCount = guildCount,
State = state,
LastUpdate = DateTime.UtcNow,
StateCounter = status.State == state
? status.StateCounter + 1
: 1
};
if (status.StateCounter > 1 && status.State == ConnState.Disconnected)
{
Log.Warning("Shard {ShardId} is in DISCONNECTED state! ({StateCounter})",
status.ShardId,
status.StateCounter);
}
return _gracefulImminent;
}
}
public void SetShardCount(int totalShards)
{
lock (locker)
{
SaveConfig(new Config(
totalShards,
_config.RecheckIntervalMs,
_config.ShardStartCommand,
_config.ShardStartArgs,
_config.UnresponsiveSec));
}
}
public void RestartShard(int shardId, bool queue)
{
lock (locker)
{
if (shardId >= _shardStatuses.Length)
throw new ArgumentOutOfRangeException(nameof(shardId));
_shardStatuses[shardId] = _shardStatuses[shardId] with
{
ShouldRestart = true,
StateCounter = 0,
};
}
}
public void RestartAll(bool nuke)
{
lock (locker)
{
if (nuke)
{
KillAll();
}
QueueAll();
}
}
private void KillAll()
{
lock (locker)
{
for (var shardId = 0; shardId < _shardStatuses.Length; shardId++)
{
var status = _shardStatuses[shardId];
if (status.Process is Process p)
{
try { p.Kill(); } catch { }
try { p.Dispose(); } catch { }
_shardStatuses[shardId] = status with
{
Process = null,
ShouldRestart = true,
LastUpdate = DateTime.UtcNow,
State = ConnState.Disconnected,
StateCounter = 0,
};
}
}
}
}
public void SaveState()
{
var coordState = new CoordState()
{
StatusObjects = _shardStatuses
.Select(x => new JsonStatusObject()
{
Pid = x.Process?.Id,
ConnectionState = x.State,
GuildCount = x.GuildCount,
})
.ToList()
};
var jsonState = JsonSerializer.Serialize(coordState, new JsonSerializerOptions()
{
WriteIndented = true,
});
File.WriteAllText(GRACEFUL_STATE_PATH, jsonState);
}
private bool TryRestoreOldState()
{
lock (locker)
{
if (!File.Exists(GRACEFUL_STATE_PATH))
return false;
Log.Information("Restoring old coordinator state...");
CoordState savedState;
try
{
savedState = JsonSerializer.Deserialize<CoordState>(File.ReadAllText(GRACEFUL_STATE_PATH));
if (savedState is null)
throw new Exception("Old state is null?!");
}
catch (Exception ex)
{
Log.Error(ex, "Error deserializing old state: {Message}", ex.Message);
File.Move(GRACEFUL_STATE_PATH, GRACEFUL_STATE_BACKUP_PATH, overwrite: true);
return false;
}
if (savedState.StatusObjects.Count != _config.TotalShards)
{
Log.Error("Unable to restore old state because shard count doesn't match");
File.Move(GRACEFUL_STATE_PATH, GRACEFUL_STATE_BACKUP_PATH, overwrite: true);
return false;
}
_shardStatuses = new ShardStatus[_config.TotalShards];
for (int shardId = 0; shardId < _shardStatuses.Length; shardId++)
{
var statusObj = savedState.StatusObjects[shardId];
Process p = null;
if (statusObj.Pid is { } pid)
{
try
{
p = Process.GetProcessById(pid);
}
catch (Exception ex)
{
Log.Warning(ex, "Process for shard {ShardId} is not runnning", shardId);
}
}
_shardStatuses[shardId] = new(
shardId,
DateTime.UtcNow,
statusObj.GuildCount,
statusObj.ConnectionState,
p is null,
p);
}
File.Move(GRACEFUL_STATE_PATH, GRACEFUL_STATE_BACKUP_PATH, overwrite: true);
Log.Information("Old state restored!");
return true;
}
}
private void InitAll()
{
lock (locker)
{
_shardStatuses = new ShardStatus[_config.TotalShards];
for (var shardId = 0; shardId < _shardStatuses.Length; shardId++)
{
_shardStatuses[shardId] = new ShardStatus(shardId, DateTime.UtcNow);
}
}
}
private void QueueAll()
{
lock (locker)
{
for (var shardId = 0; shardId < _shardStatuses.Length; shardId++)
{
_shardStatuses[shardId] = _shardStatuses[shardId] with
{
ShouldRestart = true
};
}
}
}
public ShardStatus GetShardStatus(int shardId)
{
lock (locker)
{
if (shardId >= _shardStatuses.Length)
throw new ArgumentOutOfRangeException(nameof(shardId));
return _shardStatuses[shardId];
}
}
public List<ShardStatus> GetAllStatuses()
{
lock (locker)
{
var toReturn = new List<ShardStatus>(_shardStatuses.Length);
toReturn.AddRange(_shardStatuses);
return toReturn;
}
}
public void PrepareGracefulShutdown()
{
lock (locker)
{
_gracefulImminent = true;
}
}
public string GetConfigText()
=> File.ReadAllText(CONFIG_PATH);
public void SetConfigText(string text)
{
if (string.IsNullOrWhiteSpace(text))
throw new ArgumentNullException(nameof(text), "coord.yml can't be empty");
var config = _deserializer.Deserialize<Config>(text);
SaveConfig(in config);
ReloadConfig();
}
}
}

View file

@ -0,0 +1,144 @@
using System;
using System.Threading.Tasks;
using Google.Protobuf.WellKnownTypes;
using Grpc.Core;
namespace EllieBot.Coordinator
{
public sealed class CoordinatorService : Coordinator.CoordinatorBase
{
private readonly CoordinatorRunner _runner;
public CoordinatorService(CoordinatorRunner runner)
=> _runner = runner;
public override Task<HeartbeatReply> Heartbeat(HeartbeatRequest request, ServerCallContext context)
{
var gracefulImminent = _runner.Heartbeat(request.ShardId, request.GuildCount, request.State);
return Task.FromResult(new HeartbeatReply()
{
GracefulImminent = gracefulImminent
});
}
public override Task<ReshardReply> Reshard(ReshardRequest request, ServerCallContext context)
{
_runner.SetShardCount(request.Shards);
return Task.FromResult(new ReshardReply());
}
public override Task<RestartShardReply> RestartShard(RestartShardRequest request, ServerCallContext context)
{
_runner.RestartShard(request.ShardId, request.Queue);
return Task.FromResult(new RestartShardReply());
}
public override Task<ReloadReply> Reload(ReloadRequest request, ServerCallContext context)
{
_runner.ReloadConfig();
return Task.FromResult(new ReloadReply());
}
public override Task<GetStatusReply> GetStatus(GetStatusRequest request, ServerCallContext context)
{
var status = _runner.GetShardStatus(request.ShardId);
return Task.FromResult(StatusToStatusReply(status));
}
public override Task<GetAllStatusesReply> GetAllStatuses(GetAllStatusesRequest request,
ServerCallContext context)
{
var statuses = _runner
.GetAllStatuses();
var reply = new GetAllStatusesReply();
foreach (var status in statuses)
reply.Statuses.Add(StatusToStatusReply(status));
return Task.FromResult(reply);
}
private static GetStatusReply StatusToStatusReply(ShardStatus status)
{
DateTime startTime;
try
{
startTime = status.Process is null or { HasExited: true }
? DateTime.MinValue.ToUniversalTime()
: status.Process.StartTime.ToUniversalTime();
}
catch
{
startTime = DateTime.MinValue.ToUniversalTime();
}
var reply = new GetStatusReply()
{
State = status.State,
GuildCount = status.GuildCount,
ShardId = status.ShardId,
LastUpdate = Timestamp.FromDateTime(status.LastUpdate),
ScheduledForRestart = status.ShouldRestart,
StartedAt = Timestamp.FromDateTime(startTime)
};
return reply;
}
public override Task<RestartAllReply> RestartAllShards(RestartAllRequest request, ServerCallContext context)
{
_runner.RestartAll(request.Nuke);
return Task.FromResult(new RestartAllReply());
}
public override async Task<DieReply> Die(DieRequest request, ServerCallContext context)
{
if (request.Graceful)
{
_runner.PrepareGracefulShutdown();
await Task.Delay(10_000);
}
_runner.SaveState();
_ = Task.Run(async () =>
{
await Task.Delay(250);
Environment.Exit(0);
});
return new DieReply();
}
public override Task<SetConfigTextReply> SetConfigText(SetConfigTextRequest request, ServerCallContext context)
{
var error = string.Empty;
var success = true;
try
{
_runner.SetConfigText(request.ConfigYml);
}
catch (Exception ex)
{
error = ex.Message;
success = false;
}
return Task.FromResult<SetConfigTextReply>(new(new()
{
Success = success,
Error = error
}));
}
public override Task<GetConfigTextReply> GetConfigText(GetConfigTextRequest request, ServerCallContext context)
{
var text = _runner.GetConfigText();
return Task.FromResult(new GetConfigTextReply()
{
ConfigYml = text,
});
}
}
}

View file

@ -0,0 +1,21 @@
namespace EllieBot.Coordinator
{
public readonly struct Config
{
public int TotalShards { get; init; }
public int RecheckIntervalMs { get; init; }
public string ShardStartCommand { get; init; }
public string ShardStartArgs { get; init; }
public double UnresponsiveSec { get; init; }
public Config(int totalShards, int recheckIntervalMs, string shardStartCommand, string shardStartArgs, double unresponsiveSec)
{
TotalShards = totalShards;
RecheckIntervalMs = recheckIntervalMs;
ShardStartCommand = shardStartCommand;
ShardStartArgs = shardStartArgs;
UnresponsiveSec = unresponsiveSec;
}
}
}

View file

@ -0,0 +1,9 @@
using System.Collections.Generic;
namespace EllieBot.Coordinator
{
public class CoordState
{
public List<JsonStatusObject> StatusObjects { get; init; }
}
}

View file

@ -0,0 +1,9 @@
namespace EllieBot.Coordinator
{
public class JsonStatusObject
{
public int? Pid { get; init; }
public int GuildCount { get; init; }
public ConnState ConnectionState { get; init; }
}
}

View file

@ -0,0 +1,15 @@
using System;
using System.Diagnostics;
namespace EllieBot.Coordinator
{
public sealed record ShardStatus(
int ShardId,
DateTime LastUpdate,
int GuildCount = 0,
ConnState State = ConnState.Disconnected,
bool ShouldRestart = false,
Process Process = null,
int StateCounter = 0
);
}

View file

@ -0,0 +1,9 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft": "Warning",
"Microsoft.Hosting.Lifetime": "Information"
}
}
}

View file

@ -0,0 +1,20 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft": "Warning",
"Microsoft.Hosting.Lifetime": "Information"
}
},
"AllowedHosts": "*",
"Kestrel": {
"EndpointDefaults": {
"Protocols": "Http2"
},
"Endpoints": {
"Http": {
"Url": "http://localhost:3442"
}
}
}
}

View file

@ -0,0 +1,12 @@
# total number of shards
TotalShards: 3
# How often do shards ping their state back to the coordinator
RecheckIntervalMs: 5000
# Command to run the shard
ShardStartCommand: dotnet
# Arguments to run the shard
# {0} = shard id
# {1} = total number of shards
ShardStartArgs: run -p "..\EllieBot\EllieBot.csproj" --no-build -- {0} {1}
# How long does it take for the shard to be forcefully restarted once it stops reporting its state
UnresponsiveSec: 30