Skip to content

Commit

Permalink
Forward from/to parameters (#22)
Browse files Browse the repository at this point in the history
  • Loading branch information
JakeYallop authored May 27, 2024
1 parent 534fc5f commit d65d7dd
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 11 deletions.
2 changes: 1 addition & 1 deletion WaybackDownloader/DefaultCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public override async Task<int> ExecuteAsync(CommandContext context, Settings se
return 0;
}

var downloaderTask = downloaderService.StartDownloadAsync(settings.MatchUrl, settings.MatchType, settings.ParsedFilters, settings.LimitPages, workerCts.Token);
var downloaderTask = downloaderService.StartDownloadAsync(settings.MatchUrl, settings.MatchType, settings.From, settings.To, settings.ParsedFilters, settings.LimitPages, workerCts.Token);
pageWorkerRunner.StartTasks(outputDir.FullName, settings.RateLimit, workerCts.Token);
pageWorkerRunnerTask = pageWorkerRunner.WaitForCompletionAsync();
await downloaderTask.ConfigureAwait(false);
Expand Down
8 changes: 4 additions & 4 deletions WaybackDownloader/Services/DownloaderService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ internal sealed class DownloaderService(
{
private readonly ChannelWriter<CdxRecord> _writer = channel.Writer;

public async Task StartDownloadAsync(string urlPrefix, string matchType, CdxFilter[] filters, long? webpageLimit, CancellationToken cancellationToken)
public async Task StartDownloadAsync(string urlPrefix, string matchType, long? from, long? to, CdxFilter[] filters, long? webpageLimit, CancellationToken cancellationToken)
{
try
{
await foreach (var record in GetInitialFileListAsync(urlPrefix, matchType, filters, webpageLimit, cancellationToken))
await foreach (var record in GetInitialFileListAsync(urlPrefix, matchType, from, to, filters, webpageLimit, cancellationToken))
{
try
{
Expand Down Expand Up @@ -44,9 +44,9 @@ public async Task StartDownloadAsync(string urlPrefix, string matchType, CdxFilt
}
}

private async IAsyncEnumerable<CdxRecord> GetInitialFileListAsync(string url, string matchType, CdxFilter[] filters, long? webpageLimit, [EnumeratorCancellation] CancellationToken cancellationToken)
private async IAsyncEnumerable<CdxRecord> GetInitialFileListAsync(string url, string matchType, long? from, long? to, CdxFilter[] filters, long? webpageLimit, [EnumeratorCancellation] CancellationToken cancellationToken)
{
await foreach (var record in cdxClient.GetSnapshotListAsync(url, matchType, filters, webpageLimit, cancellationToken: cancellationToken).WithCancellation(CancellationToken.None))
await foreach (var record in cdxClient.GetSnapshotListAsync(url, matchType, from, to, filters, webpageLimit, cancellationToken: cancellationToken).WithCancellation(CancellationToken.None))
{
if (record is null)
{
Expand Down
1 change: 0 additions & 1 deletion WaybackDownloader/Services/PageWorkerRunner.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
using System.Threading.RateLimiting;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Spectre.Console;

namespace WaybackDownloader.Services;

Expand Down
20 changes: 15 additions & 5 deletions WaybackDownloader/Services/WaybackCdxClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,35 @@ internal sealed class WaybackCdxClient(HttpClient client, ILogger<WaybackCdxClie
private readonly ILogger<WaybackCdxClient> _logger = logger;
private readonly HttpClient _client = client;

public async IAsyncEnumerable<CdxRecord?> GetSnapshotListAsync(string matchUrl, string? matchType = null, CdxFilter[]? filters = null, long? webpageLimit = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
public async IAsyncEnumerable<CdxRecord?> GetSnapshotListAsync(string matchUrl, string? matchType = null, long? from = null, long? to = null, CdxFilter[]? filters = null, long? webpageLimit = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{

var baseQueryBuilder = new StringBuilder($"url={matchUrl}");
var queryBuilder = new StringBuilder($"url={matchUrl}");
if (matchType is not null)
{
if (!MatchTypes.IsValid(matchType))
{
throw new ArgumentException($"Match type '{matchType}' is not valid. Expected one of [{MatchTypes.Exact}, {MatchTypes.Prefix}, {MatchTypes.Host}, {MatchTypes.Domain}].");
}
baseQueryBuilder.Append(CultureInfo.InvariantCulture, $"&matchType={matchType}");
queryBuilder.Append(CultureInfo.InvariantCulture, $"&matchType={matchType}");
}

if (from is not null)
{
queryBuilder.Append(CultureInfo.InvariantCulture, $"&from={from.Value}");
}

if (to is not null)
{
queryBuilder.Append(CultureInfo.InvariantCulture, $"&to={to.Value}");
}

foreach (var filter in filters ?? [])
{
baseQueryBuilder.Append(CultureInfo.InvariantCulture, $"&filter={filter}");
queryBuilder.Append(CultureInfo.InvariantCulture, $"&filter={filter}");
}

var query = baseQueryBuilder.ToString();
var query = queryBuilder.ToString();
var page = 0;
var websiteCount = 0L;
var webpageLimitReached = false;
Expand Down

0 comments on commit d65d7dd

Please sign in to comment.