Innovenergy_trunk/csharp/App/S3Explorer/Program.cs

121 lines
4.3 KiB
C#

using InnovEnergy.App.Backend.S3;
using InnovEnergy.Lib.Time.Unix;
using InnovEnergy.Lib.Utils;
namespace S3Explorer;
public static class Program
{
private const String BucketSalt = "-3e5b3069-214a-43ee-8d85-57d72000c19d";
public static async Task<Int32> Main(String[] args)
{
// Todo refactor S3Access into Lib
// Sssssecret
if (args.Contains("-s"))
{
await SnakeGameSs.PlaySnake();
}
// Help message
if (args.Length < 1 || args.Contains("-h"))
{
Console.WriteLine("Usage: S3Explorer installation-id [from-unix-time] [to-unix-time] [nb-data-points]");
Console.WriteLine("-h Shows this message.");
Console.WriteLine("-s 🐍");
return 0;
}
// Parsing Arguments
var bucketName = args[0] + BucketSalt;
var now = UnixTime.Now;
var startTime = Int64.Parse(args.ElementAtOr(1, (now - UnixTimeSpan.FromSeconds(20)).ToString()));
var endTime = Int64.Parse(args.ElementAtOr(2, now.ToString()));
var nDataPoints = Int64.Parse(args.ElementAtOr(3, "10"));
var timestampList = GetDataTimestamps(startTime, endTime, nDataPoints);
await PrintFiles(bucketName, timestampList);
// Success
return 0;
}
private static IEnumerable<Int64> GetDataTimestamps(Int64 startTime, Int64 endTime, Int64 nDataPoints)
{
// Calculating temporal distance of data files from the number of requested points. (rounding for int division)
var timeSpan = endTime - startTime;
var timeBetweenDataPoints = (Double)(timeSpan / nDataPoints);
timeBetweenDataPoints = Math.Max(2, timeBetweenDataPoints);
// We only upload data every second second so sampling more is impossible.
// If this ever changes we might have to change this as well.
// Building a List of the timestamps we want to grab the files for.
for (Double i = startTime; i <= endTime; i += timeBetweenDataPoints)
{
//Rounding to even numbers only (we only save every second second)
var integer = (Int64) Math.Round(i);
yield return integer/2 * 2;
}
}
private static async Task PrintFiles(String bucketName, IEnumerable<Int64> timestampList)
{
var columns = new Dictionary<String, List<String?>>
{
["timestamp"] = new()
};
var index = 0;
foreach (var timestamp in timestampList)
{
var csvFileText = await GetFileText(bucketName, timestamp);
columns["timestamp"].Add(timestamp.ToString());
var dict = csvFileText is null
? new Dictionary<String, String>()
: csvFileText
.Select(l => l.Split(";"))
.ToDictionary(kv => kv[0], kv => kv[1]);
foreach (var key in dict.Keys)
{
// if a key is not yet present in columns we need to backfill it with nulls
if (!columns.ContainsKey(key))
columns[key] = Enumerable.Repeat<String?>(null, index).ToList();
columns[key].Add(dict[key]);
}
// if a key in columns is not present in this record (dict) (except the timestamp) we need to set it to null
foreach (var key in columns.Keys.Where(key => !dict.ContainsKey(key) && key != "timestamp"))
{
columns[key].Add(null);
}
index++;
}
var headerKeys = columns
.Keys
.OrderBy(k => k)
.Where(k => k != "timestamp")
.Prepend("timestamp")
.ToList();
String.Join(';', headerKeys).WriteLine();
Enumerable.Range(0, index)
.Select(i => headerKeys.Select(hk => columns[hk][i]).JoinWith(";"))
.ForEach(Console.WriteLine);
}
// This Method extracts the Text from a given csv file on the s3 bucket
private static async Task<IReadOnlyList<String>?> GetFileText(String bucketName, Int64 timestamp)
{
return await S3Access.Admin.GetFileLines(bucketName, $"{timestamp}.csv");
}
}