Merge pull request #691 from erri120/cli-find-similar

Added FindSimilar CLI option
This commit is contained in:
Timothy Baldridge 2020-04-09 14:23:36 -06:00 committed by GitHub
commit 6f579e4b4c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 77 additions and 1 deletions

View File

@ -17,7 +17,8 @@ namespace Wabbajack.CLI
typeof(ServerLog),
typeof(MyFiles),
typeof(DeleteFile),
typeof(Changelog)
typeof(Changelog),
typeof(FindSimilar)
};
}
}

View File

@ -20,6 +20,7 @@ namespace Wabbajack.CLI
(MyFiles opts) => opts.Execute(),
(DeleteFile opts) => opts.Execute(),
(Changelog opts) => opts.Execute(),
(FindSimilar opts) => opts.Execute(),
errs => 1);
}
}

View File

@ -0,0 +1,73 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using CommandLine;
using F23.StringSimilarity;
using Wabbajack.Common;
using Directory = Alphaleonis.Win32.Filesystem.Directory;
using Path = Alphaleonis.Win32.Filesystem.Path;
namespace Wabbajack.CLI.Verbs
{
[Verb("find-similar", HelpText = "Finds duplicate downloads")]
public class FindSimilar : AVerb
{
[IsDirectory(CustomMessage = "Downloads folder at %1 does not exist!")]
[Option('i', "input", HelpText = "Downloads folder", Required = true)]
public string? DownloadsFolder { get; set; }
[Option('t', "threshold", HelpText = "Set the threshold for the maximum distance", Default = 0.2, Required = false)]
public double Threshold { get; set; }
protected override async Task<ExitCode> Run()
{
var downloads = Directory.EnumerateFiles(DownloadsFolder, "*", SearchOption.TopDirectoryOnly)
.Where(x => Consts.SupportedArchives.Contains(Path.GetExtension(x)))
.Select(Path.GetFileNameWithoutExtension)
.ToList();
var similar = downloads
.Select(x =>
{
var pair = new KeyValuePair<string, CompareStruct>(x, downloads
.Where(y => y != x)
.Select(y =>
{
var lcs = new MetricLCS();
var distance = lcs.Distance(x, y);
return new CompareStruct(y, distance);
})
.Aggregate((smallest, next) => smallest.Distance < next.Distance ? smallest : next));
return pair;
})
.DistinctBy(x => x.Key)
.DistinctBy(x => x.Value.Distance)
.Where(x => x.Value.Distance <= Threshold)
.ToList();
CLIUtils.Log($"Found {similar.Count} similar files:");
similar.Do(f =>
{
var (key, value) = f;
CLIUtils.Log($"{key} similar to {value.Name} by {Math.Round(value.Distance, 3)}");
});
return ExitCode.Ok;
}
internal struct CompareStruct
{
public string Name;
public double Distance;
public CompareStruct(string name, double distance)
{
Name = name;
Distance = distance;
}
}
}
}

View File

@ -18,6 +18,7 @@
<ItemGroup>
<PackageReference Include="CommandLineParser" Version="2.7.82" />
<PackageReference Include="F23.StringSimilarity" Version="3.1.0" />
<PackageReference Include="Markdig" Version="0.18.3" />
</ItemGroup>