Skip to content

Commit 9373f5e

Browse files
author
Wang, Gang(Gary)
committed
MNEMONIC-232: Improve the MneDurableInputSession to accept multiple paths
1 parent 7799786 commit 9373f5e

File tree

4 files changed

+64
-16
lines changed

4 files changed

+64
-16
lines changed

mnemonic-hadoop/mnemonic-hadoop-mapreduce/src/main/java/org/apache/mnemonic/hadoop/MneDurableInputSession.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public class MneDurableInputSession<V>
4545
private Iterator<String> m_fp_iter;
4646

4747
public MneDurableInputSession(TaskAttemptContext taskAttemptContext,
48-
Configuration configuration, Path path, String prefix) {
48+
Configuration configuration, Path[] paths, String prefix) {
4949
if (null == taskAttemptContext && null == configuration) {
5050
throw new ConfigurationException("Session is not configured properly");
5151
}
@@ -55,15 +55,17 @@ public MneDurableInputSession(TaskAttemptContext taskAttemptContext,
5555
} else {
5656
setConfiguration(configuration);
5757
}
58-
initialize(path, prefix);
58+
initialize(paths, prefix);
5959
}
6060

61-
public void initialize(Path path, String prefix) {
62-
if (!Files.isRegularFile(Paths.get(path.toString()), LinkOption.NOFOLLOW_LINKS)) {
63-
throw new UnsupportedOperationException();
64-
}
61+
public void initialize(Path[] paths, String prefix) {
6562
List<String> fpathlist = new ArrayList<String>();
66-
fpathlist.add(path.toString());
63+
for (Path p : paths) {
64+
if (!Files.isRegularFile(Paths.get(p.toString()), LinkOption.NOFOLLOW_LINKS)) {
65+
throw new UnsupportedOperationException();
66+
}
67+
fpathlist.add(p.toString());
68+
}
6769
m_fp_iter = fpathlist.iterator();
6870
readConfig(prefix);
6971
}

mnemonic-hadoop/mnemonic-hadoop-mapreduce/src/main/java/org/apache/mnemonic/hadoop/mapred/MneMapredRecordReader.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import java.io.IOException;
2222

23+
import org.apache.hadoop.fs.Path;
2324
import org.apache.hadoop.io.NullWritable;
2425
import org.apache.hadoop.mapred.FileSplit;
2526
import org.apache.hadoop.mapred.JobConf;
@@ -46,7 +47,7 @@ public class MneMapredRecordReader<MV extends MneDurableInputValue<V>, V>
4647
public MneMapredRecordReader(FileSplit fileSplit, JobConf conf) throws IOException {
4748
m_fileSplit = fileSplit;
4849
m_session = new MneDurableInputSession<V>(null, conf,
49-
m_fileSplit.getPath(), MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX);
50+
new Path[]{m_fileSplit.getPath()}, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX);
5051
m_iter = m_session.iterator();
5152
}
5253

mnemonic-hadoop/mnemonic-hadoop-mapreduce/src/main/java/org/apache/mnemonic/hadoop/mapreduce/MneMapreduceRecordReader.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import java.io.IOException;
2121

22+
import org.apache.hadoop.fs.Path;
2223
import org.apache.hadoop.io.NullWritable;
2324
import org.apache.hadoop.mapreduce.InputSplit;
2425
import org.apache.hadoop.mapreduce.TaskAttemptContext;
@@ -51,7 +52,7 @@ public void close() throws IOException {
5152
public void initialize(InputSplit inputSplit, TaskAttemptContext context) {
5253
FileSplit split = (FileSplit) inputSplit;
5354
m_session = new MneDurableInputSession<V>(context, null,
54-
split.getPath(), MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX);
55+
new Path[]{split.getPath()}, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX);
5556
m_iter = m_session.iterator();
5657
}
5758

mnemonic-hadoop/mnemonic-hadoop-mapreduce/src/test/java/org/apache/mnemonic/mapreduce/MneMapreduceChunkDataTest.java

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,13 @@
4444
import org.apache.mnemonic.DurableType;
4545
import org.apache.mnemonic.Utils;
4646
import org.apache.mnemonic.hadoop.MneConfigHelper;
47+
import org.apache.mnemonic.hadoop.MneDurableInputSession;
4748
import org.apache.mnemonic.hadoop.MneDurableInputValue;
4849
import org.apache.mnemonic.hadoop.MneDurableOutputSession;
4950
import org.apache.mnemonic.hadoop.MneDurableOutputValue;
5051
import org.apache.mnemonic.hadoop.mapreduce.MneInputFormat;
5152
import org.apache.mnemonic.hadoop.mapreduce.MneOutputFormat;
53+
import org.apache.mnemonic.sessions.SessionIterator;
5254
import org.testng.Assert;
5355
import org.testng.AssertJUnit;
5456
import org.testng.annotations.AfterClass;
@@ -73,7 +75,6 @@ public class MneMapreduceChunkDataTest {
7375
private long m_reccnt = 5000L;
7476
private volatile long m_checksum;
7577
private volatile long m_totalsize = 0L;
76-
private List<String> m_partfns;
7778
private Unsafe unsafe;
7879

7980
@BeforeClass
@@ -82,7 +83,6 @@ public void setUp() throws Exception {
8283
System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR));
8384
m_conf = new JobConf();
8485
m_rand = Utils.createRandom();
85-
m_partfns = new ArrayList<String>();
8686
unsafe = Utils.getUnsafe();
8787

8888
try {
@@ -164,6 +164,7 @@ public void testWriteChunkData() throws Exception {
164164

165165
@Test(enabled = true, dependsOnMethods = { "testWriteChunkData" })
166166
public void testReadChunkData() throws Exception {
167+
List<String> partfns = new ArrayList<String>();
167168
long reccnt = 0L;
168169
long tsize = 0L;
169170
Checksum cs = new CRC32();
@@ -174,14 +175,14 @@ public void testReadChunkData() throws Exception {
174175
if (listfiles[idx].isFile()
175176
&& listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null))
176177
&& listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) {
177-
m_partfns.add(listfiles[idx].getName());
178+
partfns.add(listfiles[idx].getName());
178179
}
179180
}
180-
Collections.sort(m_partfns); // keep the order for checksum
181-
for (int idx = 0; idx < m_partfns.size(); ++idx) {
182-
System.out.println(String.format("Verifying : %s", m_partfns.get(idx)));
181+
Collections.sort(partfns); // keep the order for checksum
182+
for (int idx = 0; idx < partfns.size(); ++idx) {
183+
System.out.println(String.format("Verifying : %s", partfns.get(idx)));
183184
FileSplit split = new FileSplit(
184-
new Path(m_workdir, m_partfns.get(idx)), 0, 0L, new String[0]);
185+
new Path(m_workdir, partfns.get(idx)), 0, 0L, new String[0]);
185186
InputFormat<NullWritable, MneDurableInputValue<DurableChunk<?>>> inputFormat =
186187
new MneInputFormat<MneDurableInputValue<DurableChunk<?>>, DurableChunk<?>>();
187188
RecordReader<NullWritable, MneDurableInputValue<DurableChunk<?>>> reader =
@@ -204,4 +205,47 @@ public void testReadChunkData() throws Exception {
204205
AssertJUnit.assertEquals(m_checksum, cs.getValue());
205206
System.out.println(String.format("The checksum of chunk is %d", m_checksum));
206207
}
208+
209+
@Test(enabled = true, dependsOnMethods = { "testWriteChunkData" })
210+
public void testBatchReadChunkDataUsingInputSession() throws Exception {
211+
List<String> partfns = new ArrayList<String>();
212+
long reccnt = 0L;
213+
long tsize = 0L;
214+
Checksum cs = new CRC32();
215+
cs.reset();
216+
File folder = new File(m_workdir.toString());
217+
File[] listfiles = folder.listFiles();
218+
for (int idx = 0; idx < listfiles.length; ++idx) {
219+
if (listfiles[idx].isFile()
220+
&& listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null))
221+
&& listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) {
222+
partfns.add(listfiles[idx].getName());
223+
}
224+
}
225+
Collections.sort(partfns); // keep the order for checksum
226+
List<Path> paths = new ArrayList<Path>();
227+
for (String fns : partfns) {
228+
paths.add(new Path(m_workdir, fns));
229+
System.out.println(String.format("[Batch Mode] Added : %s", fns));
230+
}
231+
MneDurableInputSession<MneDurableInputValue<DurableChunk<?>>> m_session =
232+
new MneDurableInputSession<MneDurableInputValue<DurableChunk<?>>>(m_tacontext, null,
233+
paths.toArray(new Path[0]), MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX);
234+
SessionIterator<MneDurableInputValue<DurableChunk<?>>, ?> m_iter = m_session.iterator();
235+
MneDurableInputValue<DurableChunk<?>> dchkval = null;
236+
while (m_iter.hasNext()) {
237+
dchkval = m_iter.next();
238+
byte b;
239+
for (int j = 0; j < dchkval.getValue().getSize(); ++j) {
240+
b = unsafe.getByte(dchkval.getValue().get() + j);
241+
cs.update(b);
242+
}
243+
tsize += dchkval.getValue().getSize();
244+
++reccnt;
245+
}
246+
AssertJUnit.assertEquals(m_reccnt, reccnt);
247+
AssertJUnit.assertEquals(m_totalsize, tsize);
248+
AssertJUnit.assertEquals(m_checksum, cs.getValue());
249+
System.out.println(String.format("The checksum of chunk is %d [Batch Mode]", m_checksum));
250+
}
207251
}

0 commit comments

Comments
 (0)