Skip to content
This repository was archived by the owner on Apr 16, 2025. It is now read-only.

Commit 2af1f6d

Browse files
Merge pull request #568 from usc-isi-i2/fix-561
semantic label fix
2 parents 16c37a2 + 285418a commit 2af1f6d

20 files changed

+156
-431
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ Thumbs.db
8686
*.log
8787

8888
*.avro
89+
*.arff
8990

9091
karma-offline/karma.err
9192

karma-app/build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ unzip master.zip
4848
mv karma-app-deps-master/*.tar.gz .
4949
rm -rf karma-app-deps-master master.zip
5050
# download tomcat binary
51-
wget https://dlcdn.apache.org/tomcat/tomcat-8/v8.5.83/bin/apache-tomcat-8.5.83.zip
51+
wget https://dlcdn.apache.org/tomcat/tomcat-8/v8.5.84/bin/apache-tomcat-8.5.84.zip
5252
unzip apache-tomcat-*.zip
5353
rm apache-tomcat-*.zip
5454
mv apache-tomcat* tomcat

karma-semanticlabeling/Semantic Labeling documentation.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ Independent handling of this module:
44
- mvn exec:java -Dexec.mainClass="com.mycompany.app.App"
55

66
The code starts with cross validation of the data we have. The model is built and MRR is checked. The actual model is to be built of all the data in data/soccer2 folder.
7-
Changes for integration with karma need to be done in HybridSTModelHandler.java
7+
Integration with karma is done in HybridSTModelHandler.java
88
The DSL_main.predictSemanticType() function needs to be called from above file. Model needs to be loaded and predictions will be ranked. Once the ranking is done, check for highest probability value. If that probability is above 0.3, recommend that semantic type. If the probability is below 0.3, do not give any recommendations - treat the incoming data as newly seen data and save it. While saving the data, also check whether the data you already have (data/soccer2) surpasses the amount of data you want to hold on the server. If it does, remove certain set % of data rows from each table and then store the new file.
9-
Minor changes will be required in terms of importing the module into Karma. Test for compatibility with all the running modules. The model will need to be stored in such a way that it can be imported on local on any desktop.
9+
Once the model is built it is stored in the resources folder. During run time of karma the model is used directly from the resources folder. No re-training is required.
1010

1111
Paper: https://usc-isi-i2.github.io/papers/pham16-iswc.pdf
1212

karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/app/App.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
/**
2222
* This class is the main class for training and testing of the model.
2323
*
24-
* @author rutujarane, bdasbaksi
24+
* @author rutujarane, Bidisha Das Baksi (bidisha.bksh@gmail.com)
2525
* <p>
2626
* mvn clean install
2727
* mvn exec:java -Dexec.mainClass="com.mycompany.app.App"
@@ -145,7 +145,6 @@ public static void main(String[] args) throws Exception {
145145
String fileListTrain[] = new String[fileList.length - 1];
146146
System.arraycopy(fileList, 0, fileListTrain, 0, fileNum);
147147
System.arraycopy(fileList, fileNum + 1, fileListTrain, fileNum, fileList.length - fileNum - 1);
148-
// TimeUnit.SECONDS.sleep(1);
149148
FeatureExtractor featureExtractorObject = CreateDSLObjects.create_feature_extractor(fileListTrain);
150149
logger.log(Level.INFO, "Feature Extraction Done ! \n Starting model train !");
151150
DSL_main dsl_obj = new DSL_main(app.modelFilename, featureExtractorObject, true, true, false); // To re-train the model pass the value of load the model as false.

karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/app/CreateDSLObjects.java

Lines changed: 44 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,21 @@
1616

1717
/**
1818
* This class creates objects from csv file data.
19-
* @author rutujarane
20-
*
21-
*/
19+
*
20+
* @author rutujarane , Bidisha Das Baksi (bidisha.bksh@gmail.com)
21+
*/
2222

2323
public class CreateDSLObjects {
2424

2525
static Logger logger = LogManager.getLogger(CreateDSLObjects.class.getName());
26-
public static HashMap<String, SemType> sem_col ;
27-
// Redo this function
28-
public static String[][] readFile(String fileName){
26+
public static HashMap<String, SemType> sem_col;
27+
28+
public static String[][] readFile(String fileName) {
2929
List<String[]> rowList = new ArrayList<String[]>();
3030
try (BufferedReader br = new BufferedReader(new FileReader(fileName))) {
3131
String line;
3232
while ((line = br.readLine()) != null) {
33-
// logger.info("Line:"+line);
34-
String[] lineItems = line.split(",",-1);
33+
String[] lineItems = line.split(",", -1);
3534
rowList.add(lineItems);
3635
}
3736
br.close();
@@ -42,86 +41,69 @@ public static String[][] readFile(String fileName){
4241
matrix[i] = row;
4342
}
4443
return matrix;
45-
}
46-
catch(Exception e){
47-
// Handle any I/O problems
44+
} catch (Exception e) {
4845
logger.info("ERROR: File not readable");
4946
}
5047
String[][] matrix = new String[0][0];
5148
return matrix;
5249
}
5350

54-
public static void deleteFile(File file){
55-
try
56-
{
51+
public static void deleteFile(File file) {
52+
try {
5753
Files.deleteIfExists(Paths.get(file.getAbsolutePath()));
58-
}
59-
catch(NoSuchFileException e)
60-
{
61-
logger.info("No such file/directory exists");
62-
}
63-
catch(DirectoryNotEmptyException e)
64-
{
65-
logger.info("Directory is not empty.");
66-
}
67-
catch(IOException e)
68-
{
69-
logger.info("Invalid permissions.");
54+
} catch (NoSuchFileException e) {
55+
logger.info("No such file/directory exists");
56+
} catch (DirectoryNotEmptyException e) {
57+
logger.info("Directory is not empty.");
58+
} catch (IOException e) {
59+
logger.info("Invalid permissions.");
7060
}
71-
logger.info("Deletion successful.");
61+
logger.info("Deletion successful.");
7262
}
7363

74-
public static FeatureExtractor create_feature_extractor(String[] files) throws IOException{
64+
public static FeatureExtractor create_feature_extractor(String[] files) throws IOException {
7565
List<ColumnBasedTable> columnBasedTableObj = new ArrayList<ColumnBasedTable>();
7666

77-
int kk=0;
78-
for(String file: files){
79-
// if (!file.contains("bundesliga"))
80-
// continue;
81-
// file = "/Users/rutujarane/Desktop/ISI/Semantics/dsl/data/soccer2/2014 WC french.csv"; //test
82-
String [][] data = readFile(file);
83-
System.out.println("File gen:"+file);
84-
if(data.length == 0){
85-
logger.info("Warning: file not readable "+file);
67+
int kk = 0;
68+
for (String file : files) {
69+
String[][] data = readFile(file);
70+
System.out.println("File gen:" + file);
71+
if (data.length == 0) {
72+
logger.info("Warning: file not readable " + file);
8673
continue;
8774
}
88-
logger.info("Read the file"+file);
89-
columnBasedTableObj.add(findDatatype(data,file));
75+
logger.info("Read the file" + file);
76+
columnBasedTableObj.add(findDatatype(data, file));
9077
kk++;
91-
// if(kk>=1)
92-
// break;
9378
}
9479
return new FeatureExtractor(columnBasedTableObj);
9580

9681
}
97-
public static FeatureExtractor create_feature_extractor(HashMap<String,String[][]> dataMap) throws IOException{
82+
83+
public static FeatureExtractor create_feature_extractor(HashMap<String, String[][]> dataMap) throws IOException {
9884
List<ColumnBasedTable> columnBasedTableObj = new ArrayList<ColumnBasedTable>();
99-
for(Map.Entry<String,String[][]> entry : dataMap.entrySet())
100-
{
101-
String data[][] = entry.getValue();
85+
for (Map.Entry<String, String[][]> entry : dataMap.entrySet()) {
86+
String data[][] = entry.getValue();
10287
columnBasedTableObj.add(findDatatype(data, entry.getKey())); // Assuming tf idf is computed at token level and each cell value is not a whole token
10388
}
10489
return new FeatureExtractor(columnBasedTableObj);
10590

10691
}
10792

10893

109-
public static ColumnBasedTable findDatatype(String[][] data, String tableName){
110-
logger.info("TabName:"+tableName);
111-
// for(int i=0; i<data[0].length; i++){
112-
// System.out.print(data[1][i] + " ");
113-
// }
94+
public static ColumnBasedTable findDatatype(String[][] data, String tableName) {
95+
logger.info("TabName:" + tableName);
11496
List<Column> columns = new ArrayList<Column>();
115-
for(int index=0; index<data[0].length; index++){
116-
List<String> colData = getColumnData(data,index);
97+
for (int index = 0; index < data[0].length; index++) {
98+
List<String> colData = getColumnData(data, index);
11799
SemType semTypeObj;
118-
if(sem_col.containsKey(colData.get(0)))
119-
semTypeObj = sem_col.get(colData.get(0));
100+
if (sem_col.containsKey(colData.get(0)))
101+
semTypeObj = sem_col.get(colData.get(0));
120102
else
121-
semTypeObj = findSemType(colData.get(1));
103+
semTypeObj = findSemType(colData.get(1));
122104
Hashtable<String, Float> typeStats = new Hashtable<String, Float>();
123105
Column columnObj = new Column(tableName, colData.get(0), semTypeObj, colData.get(2), data.length, typeStats);
124-
List<String> colSubList = new ArrayList<String>(colData.subList(1,colData.size())); //3
106+
List<String> colSubList = new ArrayList<String>(colData.subList(1, colData.size())); //3
125107
columnObj.value = new ColumnData(colSubList);
126108
columns.add(columnObj);
127109
logger.info("Column Object created");
@@ -130,16 +112,16 @@ public static ColumnBasedTable findDatatype(String[][] data, String tableName){
130112
return columnBasedTableObj;
131113
}
132114

133-
public static SemType findSemType(String colName){
134-
String col[] = colName.trim().replaceAll("\"","").split("-");
135-
SemType semTypeObj = new SemType(col[0],col[0]);
115+
public static SemType findSemType(String colName) {
116+
String col[] = colName.trim().replaceAll("\"", "").split("-");
117+
SemType semTypeObj = new SemType(col[0], col[0]);
136118
return semTypeObj;
137119
}
138120

139-
public static List<String> getColumnData(String[][] data, int index){
121+
public static List<String> getColumnData(String[][] data, int index) {
140122
List<String> column = new ArrayList<String>();
141-
for(int i=0; i<data.length; i++){
142-
column.add(data[i][index].trim().replaceAll("\"",""));
123+
for (int i = 0; i < data.length; i++) {
124+
column.add(data[i][index].trim().replaceAll("\"", ""));
143125
}
144126
return column;
145127
}

karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/dsl/Column.java

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
/**
77
* This class is responsible for creating a column object for each column.
8-
* @author rutujarane
8+
* @author rutujarane, Bidisha Das Baksi (bidisha.bksh@gmail.com)
99
*/
1010

1111
public class Column implements Serializable{
@@ -20,23 +20,19 @@ public class Column implements Serializable{
2020

2121
public Column(String table_name, String name, SemType semantic_type, String typee, int sizee, Hashtable<String, Float> type_stats){
2222
this.id = table_name.concat(name);
23-
// f"{table_name}:{name}"
2423
this.table_name = table_name;
2524
this.name = name;
2625
this.semantic_type = semantic_type;
2726
this.sizee = sizee;
2827
this.type_stats = type_stats;
2928
this.typee = typee;
30-
// this.value = Optional[ColumnData] = null;
3129
this.value = null;
3230
}
3331

3432
public List<String> get_textual_data(){
3533
if(this.value.string_data()) {
3634
return this.value.string_array;
3735
}
38-
// else
39-
// return this.value.number_array; // Removing this after comparing with the python implementation
4036
return new ArrayList<String>();
4137
}
4238

karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/dsl/ColumnBasedTable.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ public class ColumnBasedTable implements Serializable{
1717
public ColumnBasedTable(String id, List<Column> columns){
1818
this.id = id;
1919
this.columns = columns;
20-
// self.name2colidx: Dict[str, int] = {cname.name: idx for idx, cname in enumerate(columns)}
2120
int i=0;
2221
for(Column col_name: columns){
2322
this.name2colidx.put(col_name.name.toString(), i);

karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/dsl/ColumnData.java

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,9 @@
66
import org.apache.logging.log4j.Logger;
77
import org.apache.logging.log4j.LogManager;
88

9-
// (object)
10-
119
/**
1210
* This class is responsible for creating an object of the data in every column.
13-
* @author rutujarane
11+
* @author rutujarane, Bidisha Das Baksi (bidisha.bksh@gmail.com)
1412
*/
1513

1614
public class ColumnData implements Serializable{
@@ -22,25 +20,10 @@ public class ColumnData implements Serializable{
2220
List<Integer> string_idx_array = new ArrayList<Integer>();
2321

2422
public ColumnData(List<String> array){
25-
// for (Object object : array) {
26-
// this.array.add(Objects.toString(object, null));
27-
// }
2823
this.array = array;
29-
// for(int i=0; i<array.size(); i++){
30-
// logger.info(" "+array.get(i));
31-
// }
32-
// this.number_array = {};
33-
// this.number_idx_array = {};
34-
// this.string_array = {};
35-
// this.string_idx_array = {};
36-
37-
// for i, val in enumerate(array):
3824
int i=0;
3925
for(Object arr: array){
40-
// logger.info(" "+arr);
4126
if(arr != null){
42-
// if(isinstance(val, (int, float)){
43-
4427
if(!string_data()){
4528
this.number_array.add(arr.toString());
4629
this.number_idx_array.add(i);
@@ -58,7 +41,7 @@ public ColumnData(List<String> array){
5841
public boolean string_data(){
5942
try
6043
{
61-
// checking valid integer using parseInt() method
44+
// checking valid integer using parseDouble() method
6245
for(String arr: this.array)
6346
Double.parseDouble(arr.toString());
6447
return false;

karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/dsl/ColumnType.java

Lines changed: 0 additions & 22 deletions
This file was deleted.

0 commit comments

Comments
 (0)