From f98a1493182e2e72a55722ab38885781ea0f5016 Mon Sep 17 00:00:00 2001 From: ig Date: Fri, 8 Sep 2023 15:19:28 +0200 Subject: [PATCH] WIP: S3Explorer, TODO: fix missing entries --- csharp/App/S3Explorer/Program.cs | 177 +++++++++++++++++------- csharp/App/S3Explorer/S3Explorer.csproj | 1 + 2 files changed, 127 insertions(+), 51 deletions(-) diff --git a/csharp/App/S3Explorer/Program.cs b/csharp/App/S3Explorer/Program.cs index 69c9d8890..186528f08 100644 --- a/csharp/App/S3Explorer/Program.cs +++ b/csharp/App/S3Explorer/Program.cs @@ -1,10 +1,12 @@ using InnovEnergy.App.Backend.S3; +using InnovEnergy.Lib.Time.Unix; using InnovEnergy.Lib.Utils; namespace S3Explorer; public static class Program { + private const String BucketSalt = "-3e5b3069-214a-43ee-8d85-57d72000c19d"; public static async Task Main(String[] args) { @@ -17,78 +19,151 @@ public static class Program } // Help message - if (args.Length < 4 || args.Contains("-h")) + if (args.Length < 1 || args.Contains("-h")) { - Console.WriteLine("Usage: S3Explorer [BucketId] [from:Unix-time] [to:Unix-time] [#Data-points]"); + Console.WriteLine("Usage: S3Explorer installation-id [from-unix-time] [to-unix-time] [nb-data-points]"); Console.WriteLine("-h Shows this message."); Console.WriteLine("-s 🐍"); return 0; } - // Parsing Arguments - var bucketName = args[0] + "-3e5b3069-214a-43ee-8d85-57d72000c19d"; - var startTime = Int64.Parse(args[1]); - var endTime = Int64.Parse(args[2]); - var numberOfDataPoints = Int64.Parse(args[3]); - var timeBetweenDataPoints = TimeBetweenDataPoints(startTime, endTime, numberOfDataPoints); + + + // Parsing Arguments + var bucketName = args[0] + BucketSalt; + var now = UnixTime.Now; + + var startTime = Int64.Parse(args.ElementAtOr(1, (now - UnixTimeSpan.FromSeconds(20)).ToString())); + var endTime = Int64.Parse(args.ElementAtOr(2, now.ToString())); + var nDataPoints = Int64.Parse(args.ElementAtOr(3, "10")) ; + + var timestampList = GetDataTimestamps(startTime, endTime, nDataPoints); - // Building a List of the timestamps we want to grab the files for. - var timestampList = new List { }; - for (var i = startTime; i <= endTime; i += timeBetweenDataPoints) - { - //Rounding to even numbers only (we only save every second second) - timestampList.Add((i/2 *2).ToString()); - } - - await PrintFiles(bucketName,timestampList); + await PrintFiles(bucketName, timestampList, endTime); // Success return 0; } - private static async Task PrintFiles(String bucketName, List timestampList) - { - var newestDataFilename = timestampList.Last(); - var csvFileText = await GetFileText(bucketName, newestDataFilename); - - // Building Header-Row from the newest data - csvFileText - .Select(l => l.Split(";")) - .Select(l => l[0]) - .Prepend("Timestamp") - .JoinWith(";") - .WriteLine(); - - foreach (var timestamp in timestampList) - { - csvFileText = await GetFileText(bucketName, timestamp); - - // Writing Data below data-keys in a timestamped row - csvFileText.Select(l => l.Split(";")) - .Select(l => l[1]) - .Prepend(timestamp) - .JoinWith(";") - .WriteLine(); - } - - } - - private static Int64 TimeBetweenDataPoints(Int64 startTime, Int64 endTime, Int64 numberOfDataPoints) + private static IEnumerable GetDataTimestamps(Int64 startTime, Int64 endTime, Int64 nDataPoints) { // Calculating temporal distance of data files from the number of requested points. var timeSpan = endTime - startTime; - var timeBetweenDataPoints = timeSpan / numberOfDataPoints; + var timeBetweenDataPoints1 = timeSpan / nDataPoints; // We only upload data every second second so sampling more is impossible. // If this ever changes we might have to change this as well. - timeBetweenDataPoints = Math.Max(timeBetweenDataPoints, 2); - return timeBetweenDataPoints; + var timeBetweenDataPoints = Math.Max(timeBetweenDataPoints1, 2); + + // Building a List of the timestamps we want to grab the files for. + + for (var i = startTime; i <= endTime; i += timeBetweenDataPoints) + { + //Rounding to even numbers only (we only save every second second) + yield return i / 2 * 2; + } } + + // private static async Task PrintFiles(String bucketName, IEnumerable timestampList, Int64 endTime) + // { + // // var csvFileText = await GetFileText(bucketName, endTime); + // // + // // // Building Header-Row from the newest data + // // csvFileText.Select(l => l.Split(";")) + // // .Select(l => l[0]) + // // .Prepend("Timestamp") + // // .JoinWith(";") + // // .WriteLine(); + // + // foreach (var timestamp in timestampList) + // { + // var csvFileText = await GetFileText(bucketName, timestamp); + // + // + // + // + // // Writing Data below data-keys in a timestamped row + // + // + // var dataPoints = csvFileText.Select(l => l.Split(";")[1]); + // + // + // dataPoints + // .Prepend(timestamp.ToString()) + // .JoinWith(";") + // .WriteLine(); + // } + // + // } - // This Method extracts the Text from a given csv file on the s3 bucket - private static async Task GetFileText(String bucketName, String filename) + + private static async Task PrintFiles(String bucketName, IEnumerable timestampList, Int64 endTime) { - return await S3Access.Admin.GetFileText(bucketName, filename + ".csv"); + // var csvFileText = await GetFileText(bucketName, endTime); + // + // // Building Header-Row from the newest data + // csvFileText.Select(l => l.Split(";")) + // .Select(l => l[0]) + // .Prepend("Timestamp") + // .JoinWith(";") + // .WriteLine(); + + + var columns = new Dictionary> + { + ["timestamp"] = new List() + }; + + var index = 0; + + foreach (var timestamp in timestampList) + { + var csvFileText = await GetFileText(bucketName, timestamp); + + columns["timestamp"].Add(timestamp.ToString()); + + var dict = csvFileText is null + ? new Dictionary() + : csvFileText + .Select(l => l.Split(";")) + .ToDictionary(kv => kv[0], kv => kv[1]); + + foreach (var key in dict.Keys) + { + // if a key is not yet present in columns we need to backfill it with nulls + if (!columns.ContainsKey(key)) + columns[key] = Enumerable.Repeat(null, index).ToList(); + + columns[key].Add(dict[key]); + } + + foreach (var key in columns.Keys.Where(key => !dict.ContainsKey(key))) + { + // if a key in columns is not present in this record (dict) we need to set it to null + columns[key].Add(null); + } + + index++; + } + + var headerKeys = columns + .Keys + .OrderBy(k => k) + .Where(k => k != "timestamp") + .Prepend("timestamp") + .ToList(); + + String.Join(';', headerKeys).WriteLine(); + + Enumerable.Range(0, index) + .Select(i => headerKeys.Select(hk => columns[hk][i]).JoinWith(";")) + .ForEach(Console.WriteLine); + } + + // This Method extracts the Text from a given csv file on the s3 bucket + private static async Task?> GetFileText(String bucketName, Int64 timestamp) + { + return await S3Access.Admin.GetFileLines(bucketName, $"{timestamp}.csv"); } } \ No newline at end of file diff --git a/csharp/App/S3Explorer/S3Explorer.csproj b/csharp/App/S3Explorer/S3Explorer.csproj index 3c32de060..599f0fac3 100644 --- a/csharp/App/S3Explorer/S3Explorer.csproj +++ b/csharp/App/S3Explorer/S3Explorer.csproj @@ -8,6 +8,7 @@ +