From 792fa1f08a1024aea693770de6785f8eff4c276b Mon Sep 17 00:00:00 2001 From: erri120 Date: Wed, 8 Apr 2020 21:56:53 +0200 Subject: [PATCH] Added FindSimilar CLI option --- Wabbajack.CLI/OptionsDefinition.cs | 3 +- Wabbajack.CLI/Program.cs | 1 + Wabbajack.CLI/Verbs/FindSimilar.cs | 73 ++++++++++++++++++++++++++++++ Wabbajack.CLI/Wabbajack.CLI.csproj | 1 + 4 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 Wabbajack.CLI/Verbs/FindSimilar.cs diff --git a/Wabbajack.CLI/OptionsDefinition.cs b/Wabbajack.CLI/OptionsDefinition.cs index af2373da..46e7de42 100644 --- a/Wabbajack.CLI/OptionsDefinition.cs +++ b/Wabbajack.CLI/OptionsDefinition.cs @@ -17,7 +17,8 @@ namespace Wabbajack.CLI typeof(ServerLog), typeof(MyFiles), typeof(DeleteFile), - typeof(Changelog) + typeof(Changelog), + typeof(FindSimilar) }; } } diff --git a/Wabbajack.CLI/Program.cs b/Wabbajack.CLI/Program.cs index d34708b1..3e9638f5 100644 --- a/Wabbajack.CLI/Program.cs +++ b/Wabbajack.CLI/Program.cs @@ -20,6 +20,7 @@ namespace Wabbajack.CLI (MyFiles opts) => opts.Execute(), (DeleteFile opts) => opts.Execute(), (Changelog opts) => opts.Execute(), + (FindSimilar opts) => opts.Execute(), errs => 1); } } diff --git a/Wabbajack.CLI/Verbs/FindSimilar.cs b/Wabbajack.CLI/Verbs/FindSimilar.cs new file mode 100644 index 00000000..79b061ae --- /dev/null +++ b/Wabbajack.CLI/Verbs/FindSimilar.cs @@ -0,0 +1,73 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading.Tasks; +using CommandLine; +using F23.StringSimilarity; +using Wabbajack.Common; +using Directory = Alphaleonis.Win32.Filesystem.Directory; +using Path = Alphaleonis.Win32.Filesystem.Path; + +namespace Wabbajack.CLI.Verbs +{ + [Verb("find-similar", HelpText = "Finds duplicate downloads")] + public class FindSimilar : AVerb + { + [IsDirectory(CustomMessage = "Downloads folder at %1 does not exist!")] + [Option('i', "input", HelpText = "Downloads folder", Required = true)] + public string? DownloadsFolder { get; set; } + + [Option('t', "threshold", HelpText = "Set the threshold for the maximum distance", Default = 0.2, Required = false)] + public double Threshold { get; set; } + + protected override async Task Run() + { + var downloads = Directory.EnumerateFiles(DownloadsFolder, "*", SearchOption.TopDirectoryOnly) + .Where(x => Consts.SupportedArchives.Contains(Path.GetExtension(x))) + .Select(Path.GetFileNameWithoutExtension) + .ToList(); + + var similar = downloads + .Select(x => + { + var pair = new KeyValuePair(x, downloads + .Where(y => y != x) + .Select(y => + { + var lcs = new MetricLCS(); + var distance = lcs.Distance(x, y); + return new CompareStruct(y, distance); + }) + .Aggregate((smallest, next) => smallest.Distance < next.Distance ? smallest : next)); + return pair; + }) + .DistinctBy(x => x.Key) + .DistinctBy(x => x.Value.Distance) + .Where(x => x.Value.Distance <= Threshold) + .ToList(); + + CLIUtils.Log($"Found {similar.Count} similar files:"); + + similar.Do(f => + { + var (key, value) = f; + CLIUtils.Log($"{key} similar to {value.Name} by {Math.Round(value.Distance, 3)}"); + }); + + return ExitCode.Ok; + } + + internal struct CompareStruct + { + public string Name; + public double Distance; + + public CompareStruct(string name, double distance) + { + Name = name; + Distance = distance; + } + } + } +} diff --git a/Wabbajack.CLI/Wabbajack.CLI.csproj b/Wabbajack.CLI/Wabbajack.CLI.csproj index 10f13955..d24bb4d5 100644 --- a/Wabbajack.CLI/Wabbajack.CLI.csproj +++ b/Wabbajack.CLI/Wabbajack.CLI.csproj @@ -18,6 +18,7 @@ +