我正在尝试从互联网下载大量文件(图片)。我正在努力处理异步/并行,因为
a) 我不能说是否有文件。我刚刚收到一百万个链接,其中要么是一张图片(300kb 到 3MB),要么是 404 页面不存在。因此,为了避免下载 0 字节文件,我询问同一页面两次,一次是 404,另一次是图片。另一种方法是下载所有 0 字节文件,然后删除数百万个文件 - 这会让 Windows 10 一直停留在这个任务上,直到我重新启动。
b) 当(非常慢)下载正在进行时,每当我查看任何“成功下载的文件”时,它都是用 0 字节创建的,并且不包含图片。我需要更改什么才能在下载下一个文件之前真正下载该文件?
我该如何解决这两个问题?有没有更好的方法来下载数千或数百万个文件(不可能在服务器上压缩/创建 .zip)
//loopResult = Parallel.ForEach(_downloadLinkList, new ParallelOptions { MaxDegreeOfParallelism = 10 }, DownloadFilesParallel);
private async void DownloadFilesParallel(string path)
{
string downloadToDirectory = "";
string x = ""; //in case x fails, i get 404 from webserver and therefore no download is needed
System.Threading.Interlocked.Increment(ref downloadCount);
OnNewListEntry(downloadCount.ToString() + " / " + linkCount.ToString() + " heruntergeladen"); //tell my gui to update
try
{
using(WebClient webClient = new WebClient())
{
downloadToDirectory = Path.Combine(savePathLocalComputer, Path.GetFileName(path)); //path on local computer
webClient.Credentials = CredentialCache.DefaultNetworkCredentials;
x = await webClient.DownloadStringTaskAsync(new Uri(path)); //if this throws an exception, ignore this link
Directory.CreateDirectory(Path.GetDirectoryName(downloadToDirectory)); //if request is successfull, create -if needed- the folder on local pc
await webClient.DownloadFileTaskAsync(new Uri(path), @downloadToDirectory); //should download the file, release 1 parallel task to get the next file. instead there is a 0-byte file and the next one will be downloaded
}
}
catch(WebException wex)
{
}
catch(Exception ex)
{
System.Diagnostics.Debug.WriteLine(ex.Message);
}
finally
{
}
}
//picture is sfw, link is nsfw