Skip to content

Commit 54469ec

Browse files
authored
Merge pull request #11 from kevbite/issue/8
Issue/8 - Extract attachments from PDF
2 parents 0b70cd0 + a2a0ff9 commit 54469ec

File tree

8 files changed

+140
-58
lines changed

8 files changed

+140
-58
lines changed

src/Kevsoft.PDFtk/IPDFtk.cs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,13 @@ public interface IPDFtk
3939
/// <param name="filePaths">An enumeration of the PDF file paths to merge.</param>
4040
/// <returns>A result with the PDF as a byte array of the merged PDFs.</returns>
4141
Task<IPDFtkResult<byte[]>> ConcatAsync(IEnumerable<string> filePaths);
42-
42+
4343
/// <summary>
4444
/// Splits a single PDF in many pages and return an enumeration of bytes representing each page a s single PDF.
4545
/// </summary>
4646
/// <param name="filePath">The PDF file path.</param>
47-
/// <returns>A result with an enumeration of byte arrays.</returns>
48-
Task<IPDFtkResult<IEnumerable<byte[]>>> SplitAsync(string filePath);
47+
/// <returns>A result with an enumeration of key value pair where the key is the filename and the value is a byte arrays.</returns>
48+
Task<IPDFtkResult<IEnumerable<KeyValuePair<string, byte[]>>>> SplitAsync(string filePath);
4949

5050
/// <summary>
5151
/// Applies a stamp to a PDF file.
@@ -76,5 +76,12 @@ Task<IPDFtkResult<byte[]>> FillFormAsync(string pdfFilePath,
7676
/// <param name="replacementFilePath">A PDF file path to replace the page with.</param>
7777
/// <returns>A result with the PDF form filled as a byte array.</returns>
7878
Task<IPDFtkResult<byte[]>> ReplacePage(string pdfFilePath, int page, string replacementFilePath);
79+
80+
/// <summary>
81+
/// Extracts attachments from a PDF file.
82+
/// </summary>
83+
/// <param name="pdfFilePath">A PDF file path input.</param>
84+
/// <returns>A result with the attachments.</returns>
85+
Task<IPDFtkResult<IEnumerable<KeyValuePair<string, byte[]>>>> ExtractAttachments(string pdfFilePath);
7986
}
8087
}

src/Kevsoft.PDFtk/PDFtk.cs

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -126,28 +126,18 @@ public async Task<IPDFtkResult<byte[]>> ConcatAsync(IEnumerable<string> filePath
126126
}
127127

128128
/// <inheritdoc/>
129-
public async Task<IPDFtkResult<IEnumerable<byte[]>>> SplitAsync(string filePath)
129+
public async Task<IPDFtkResult<IEnumerable<KeyValuePair<string, byte[]>>>> SplitAsync(string filePath)
130130
{
131131
using var outputDirectory = TempPDFtkDirectory.Create();
132132

133133
var outputFilePattern = Path.Combine(outputDirectory.TempDirectoryFullName, "page_%02d.pdf");
134134
var executeProcessResult =
135135
await _pdftkProcess.ExecuteAsync(filePath, "burst", "output", outputFilePattern);
136136

137-
var outputFileBytes = new List<byte[]>();
138-
if (executeProcessResult.Success)
139-
{
140-
var outputFiles = Directory.GetFiles(outputDirectory.TempDirectoryFullName, "*.pdf");
141-
foreach (var outputFile in outputFiles)
142-
{
143-
var bytes = await File.ReadAllBytesAsync(outputFile);
144-
outputFileBytes.Add(bytes);
145-
}
146-
}
147-
148-
return new PDFtkResult<IEnumerable<byte[]>>(executeProcessResult, outputFileBytes);
137+
return await ResolveSingleDirectoryExecutionResultAsync(executeProcessResult, outputDirectory, "*.pdf");
149138
}
150139

140+
151141
/// <inheritdoc/>
152142
public async Task<IPDFtkResult<byte[]>> StampAsync(string pdfFilePath, string stampPdfFilePath)
153143
{
@@ -206,6 +196,24 @@ private static async Task<IPDFtkResult<byte[]>> ResolveSingleFileExecutionResult
206196
return new PDFtkResult<byte[]>(executeProcessResult, bytes);
207197
}
208198

199+
private static async Task<IPDFtkResult<IEnumerable<KeyValuePair<string, byte[]>>>>
200+
ResolveSingleDirectoryExecutionResultAsync(ExecutionResult executeProcessResult,
201+
TempPDFtkDirectory outputDirectory, string searchPattern)
202+
{
203+
var outputFileBytes = new List<KeyValuePair<string, byte[]>>();
204+
if (executeProcessResult.Success)
205+
{
206+
var outputFiles = Directory.GetFiles(outputDirectory.TempDirectoryFullName, searchPattern);
207+
foreach (var outputFile in outputFiles)
208+
{
209+
var bytes = await File.ReadAllBytesAsync(outputFile);
210+
var fileName = Path.GetFileName(outputFile);
211+
outputFileBytes.Add(KeyValuePair.Create(fileName, bytes));
212+
}
213+
}
214+
215+
return new PDFtkResult<IEnumerable<KeyValuePair<string, byte[]>>>(executeProcessResult, outputFileBytes);
216+
}
209217

210218
/// <inheritdoc/>
211219
public async Task<IPDFtkResult<byte[]>> ReplacePage(string pdfFilePath, int page, string replacementFilePath)
@@ -241,5 +249,20 @@ public async Task<IPDFtkResult<byte[]>> ReplacePage(string pdfFilePath, int page
241249

242250
return await ResolveSingleFileExecutionResultAsync(executeProcessResult, outputFile);
243251
}
252+
253+
/// <inheritdoc/>
254+
public async Task<IPDFtkResult<IEnumerable<KeyValuePair<string, byte[]>>>> ExtractAttachments(string pdfFilePath)
255+
{
256+
using var outputDirectory = TempPDFtkDirectory.Create();
257+
258+
var executeProcessResult = await _pdftkProcess.ExecuteAsync(
259+
pdfFilePath,
260+
"unpack_files",
261+
"output",
262+
outputDirectory.TempDirectoryFullName
263+
);
264+
265+
return await ResolveSingleDirectoryExecutionResultAsync(executeProcessResult, outputDirectory, "*");
266+
}
244267
}
245268
}

src/Kevsoft.PDFtk/PDFtkByteArrayExtensions.cs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ public static async Task<IPDFtkResult<byte[]>> ConcatAsync(this IPDFtk pdftk, IE
8080
/// </summary>
8181
/// <param name="pdftk">The IPDFtk object.</param>
8282
/// <param name="pdfFile">A byte array of the PDF file input.</param>
83-
/// <returns>A result with an enumeration of byte arrays.</returns>
84-
public static async Task<IPDFtkResult<IEnumerable<byte[]>>> SplitAsync(this IPDFtk pdftk, byte[] pdfFile)
83+
/// <returns>A result with an enumeration of key value pair where the key is the filename and the value is a byte arrays.</returns>
84+
public static async Task<IPDFtkResult<IEnumerable<KeyValuePair<string, byte[]>>>> SplitAsync(this IPDFtk pdftk, byte[] pdfFile)
8585
{
8686
using var inputFile = await TempPDFtkFile.FromAsync(pdfFile);
8787

@@ -141,7 +141,19 @@ public static async Task<IPDFtkResult<byte[]>> ReplacePage(this IPDFtk pdftk, by
141141

142142
return await pdftk.ReplacePage(inputFile.TempFileName, page, stampFile.TempFileName);
143143
}
144-
144+
145+
/// <summary>
146+
/// Extracts attachments from a PDF file.
147+
/// </summary>
148+
/// <param name="pdftk">The IPDFtk object.</param>
149+
/// <param name="fileBytes">A byte array of the PDF file input.</param>
150+
/// <returns>A result with the attachments.</returns>
151+
public static async Task<IPDFtkResult<IEnumerable<KeyValuePair<string, byte[]>>>> ExtractAttachments(this IPDFtk pdftk, byte[] fileBytes)
152+
{
153+
using var inputFile = await TempPDFtkFile.FromAsync(fileBytes);
154+
155+
return await pdftk.ExtractAttachments(inputFile.TempFileName);
156+
}
145157

146158
}
147159
}

src/Kevsoft.PDFtk/PDFtkStreamExtensions.cs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ public static async Task<IPDFtkResult<byte[]>> ConcatAsync(this IPDFtk pdftk, IE
7878
/// </summary>
7979
/// <param name="pdftk">The IPDFtk object.</param>
8080
/// <param name="pdfFile">A stream of the PDF file input.</param>
81-
/// <returns>A result with an enumeration of byte arrays.</returns>
82-
public static async Task<IPDFtkResult<IEnumerable<byte[]>>> SplitAsync(this IPDFtk pdftk, Stream pdfFile)
81+
/// <returns>A result with an enumeration of key value pair where the key is the filename and the value is a byte arrays.</returns>
82+
public static async Task<IPDFtkResult<IEnumerable<KeyValuePair<string, byte[]>>>> SplitAsync(this IPDFtk pdftk, Stream pdfFile)
8383
{
8484
using var inputFile = await TempPDFtkFile.FromAsync(pdfFile);
8585

@@ -138,5 +138,18 @@ public static async Task<IPDFtkResult<byte[]>> ReplacePage(this IPDFtk pdftk, St
138138

139139
return await pdftk.ReplacePage(inputFile.TempFileName, page, stampFile.TempFileName);
140140
}
141+
142+
/// <summary>
143+
/// Extracts attachments from a PDF file.
144+
/// </summary>
145+
/// <param name="pdftk">The IPDFtk object.</param>
146+
/// <param name="pdfFile">A stream of the PDF file input.</param>
147+
/// <returns>A result with the attachments.</returns>
148+
public static async Task<IPDFtkResult<IEnumerable<KeyValuePair<string, byte[]>>>> ExtractAttachments(this IPDFtk pdftk, Stream pdfFile)
149+
{
150+
using var inputFile = await TempPDFtkFile.FromAsync(pdfFile);
151+
152+
return await pdftk.ExtractAttachments(inputFile.TempFileName);
153+
}
141154
}
142155
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
using System.Linq;
2+
using System.Threading.Tasks;
3+
using FluentAssertions;
4+
using Xunit;
5+
6+
namespace Kevsoft.PDFtk.Tests
7+
{
8+
public class ExtractAttachmentsTests
9+
{
10+
private readonly IPDFtk _pdFtk = new PDFtk();
11+
12+
[Fact]
13+
public async Task ShouldReturnAttachments_ForInputFileAsFilePath()
14+
{
15+
var result = await _pdFtk.ExtractAttachments(TestFiles.TestFileWithAttachmentsPath);
16+
17+
result.Success.Should().BeTrue();
18+
result.Result.Should().HaveCount(1);
19+
result.Result.First().Key.Should().Be("utf8test.txt");
20+
result.Result.First().Key.Should().NotBeEmpty();
21+
}
22+
}
23+
}
Lines changed: 40 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,48 @@
11
<Project Sdk="Microsoft.NET.Sdk">
22

3-
<PropertyGroup>
4-
<TargetFrameworks>netcoreapp3.1;net5.0</TargetFrameworks>
3+
<PropertyGroup>
4+
<TargetFrameworks>netcoreapp3.1;net5.0</TargetFrameworks>
55

6-
<IsPackable>false</IsPackable>
7-
</PropertyGroup>
6+
<IsPackable>false</IsPackable>
7+
</PropertyGroup>
88

9-
<ItemGroup>
10-
<PackageReference Include="FluentAssertions" Version="5.10.3" />
11-
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.9.4" />
12-
<PackageReference Include="xunit" Version="2.4.1" />
13-
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.3">
14-
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
15-
<PrivateAssets>all</PrivateAssets>
16-
</PackageReference>
17-
<PackageReference Include="coverlet.collector" Version="1.3.0">
18-
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
19-
<PrivateAssets>all</PrivateAssets>
20-
</PackageReference>
21-
</ItemGroup>
9+
<ItemGroup>
10+
<PackageReference Include="FluentAssertions" Version="5.10.3" />
11+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.9.4" />
12+
<PackageReference Include="xunit" Version="2.4.1" />
13+
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.3">
14+
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
15+
<PrivateAssets>all</PrivateAssets>
16+
</PackageReference>
17+
<PackageReference Include="coverlet.collector" Version="1.3.0">
18+
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
19+
<PrivateAssets>all</PrivateAssets>
20+
</PackageReference>
21+
</ItemGroup>
2222

23-
<ItemGroup>
24-
<None Update="TestFiles\TestFile1.pdf">
25-
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
26-
</None>
27-
<None Update="TestFiles\Form.pdf">
28-
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
29-
</None>
30-
<None Update="TestFiles\TestFileWith2Pages.pdf">
31-
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
32-
</None>
33-
<None Update="TestFiles\TestFileWith3Pages.pdf">
34-
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
35-
</None>
36-
<None Update="TestFiles\Stamp.pdf">
37-
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
38-
</None>
39-
</ItemGroup>
23+
<ItemGroup>
24+
<None Update="TestFiles\TestFile1.pdf">
25+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
26+
</None>
27+
<None Update="TestFiles\Form.pdf">
28+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
29+
</None>
30+
<None Update="TestFiles\TestFileWith2Pages.pdf">
31+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
32+
</None>
33+
<None Update="TestFiles\TestFileWith3Pages.pdf">
34+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
35+
</None>
36+
<None Update="TestFiles\Stamp.pdf">
37+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
38+
</None>
39+
<None Update="TestFiles\TestFileWithAttachments.pdf">
40+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
41+
</None>
42+
</ItemGroup>
4043

41-
<ItemGroup>
42-
<ProjectReference Include="..\..\src\Kevsoft.PDFtk\Kevsoft.PDFtk.csproj" />
43-
</ItemGroup>
44+
<ItemGroup>
45+
<ProjectReference Include="..\..\src\Kevsoft.PDFtk\Kevsoft.PDFtk.csproj" />
46+
</ItemGroup>
4447

4548
</Project>

test/Kevsoft.PDFtk.Tests/TestFiles.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,6 @@ public static class TestFiles
77
public static readonly string FormFilePath = "TestFiles/Form.pdf";
88
public static readonly string TestFileWith2PagesPath = "TestFiles/TestFileWith2Pages.pdf";
99
public static readonly string TestFileWith3PagesPath = "TestFiles/TestFileWith3Pages.pdf";
10+
public static readonly string TestFileWithAttachmentsPath = "TestFiles/TestFileWithAttachments.pdf";
1011
}
1112
}
Binary file not shown.

0 commit comments

Comments
 (0)