usc-isi-i2
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎karma-app/build
Lines changed: 1 addition & 1 deletion b/‎karma-app/build
Lines changed: 1 addition & 1 deletion
diff --git a/‎karma-semanticlabeling/Semantic Labeling documentation.txt
Lines changed: 2 additions & 2 deletions b/‎karma-semanticlabeling/Semantic Labeling documentation.txt
Lines changed: 2 additions & 2 deletions
diff --git a/‎karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/app/App.java
Lines changed: 1 addition & 2 deletions b/‎karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/app/App.java
Lines changed: 1 addition & 2 deletions
diff --git a/‎karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/app/CreateDSLObjects.java
Lines changed: 44 additions & 62 deletions b/‎karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/app/CreateDSLObjects.java
Lines changed: 44 additions & 62 deletions
diff --git a/‎karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/dsl/Column.java
Lines changed: 1 addition & 5 deletions b/‎karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/dsl/Column.java
Lines changed: 1 addition & 5 deletions
diff --git a/‎karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/dsl/ColumnBasedTable.java
Lines changed: 0 additions & 1 deletion b/‎karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/dsl/ColumnBasedTable.java
Lines changed: 0 additions & 1 deletion
diff --git a/‎karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/dsl/ColumnData.java
Lines changed: 2 additions & 19 deletions b/‎karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/dsl/ColumnData.java
Lines changed: 2 additions & 19 deletions
diff --git a/‎karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/dsl/ColumnType.java
Lines changed: 0 additions & 22 deletions b/‎karma-semanticlabeling/src/main/java/edu/isi/karma/semanticlabeling/dsl/ColumnType.java
Lines changed: 0 additions & 22 deletions
@@ -86,6 +86,7 @@ Thumbs.db
 *.log
 
 *.avro
+*.arff
 
 karma-offline/karma.err
 
 
@@ -48,7 +48,7 @@ unzip master.zip
 mv karma-app-deps-master/*.tar.gz .
 rm -rf karma-app-deps-master master.zip
 # download tomcat binary
-wget https://dlcdn.apache.org/tomcat/tomcat-8/v8.5.83/bin/apache-tomcat-8.5.83.zip
+wget https://dlcdn.apache.org/tomcat/tomcat-8/v8.5.84/bin/apache-tomcat-8.5.84.zip
 unzip apache-tomcat-*.zip
 rm apache-tomcat-*.zip
 mv apache-tomcat* tomcat
 
@@ -4,9 +4,9 @@ Independent handling of this module:
 - mvn exec:java -Dexec.mainClass="com.mycompany.app.App" 
 
 The code starts with cross validation of the data we have. The model is built and MRR is checked. The actual model is to be built of all the data in data/soccer2 folder.
-Changes for integration with karma need to be done in HybridSTModelHandler.java
+Integration with karma is done in HybridSTModelHandler.java
 The DSL_main.predictSemanticType() function needs to be called from above file. Model needs to be loaded and predictions will be ranked. Once the ranking is done, check for highest probability value. If that probability is above 0.3, recommend that semantic type. If the probability is below 0.3, do not give any recommendations - treat the incoming data as newly seen data and save it. While saving the data, also check whether the data you already have (data/soccer2) surpasses the amount of data you want to hold on the server. If it does, remove certain set % of data rows from each table and then store the new file.
-Minor changes will be required in terms of importing the module into Karma. Test for compatibility with all the running modules. The model will need to be stored in such a way that it can be imported on local on any desktop.
+Once the model is built it is stored in the resources folder. During run time of karma the model is used directly from the resources folder. No re-training is required.
 
 Paper: https://usc-isi-i2.github.io/papers/pham16-iswc.pdf
 
 
@@ -21,7 +21,7 @@
 /**
  * This class is the main class for training and testing of the model.
  *
- * @author rutujarane, bdasbaksi
+ * @author rutujarane, Bidisha Das Baksi (bidisha.bksh@gmail.com)
  * <p>
  * mvn clean install
  * mvn exec:java -Dexec.mainClass="com.mycompany.app.App"
@@ -145,7 +145,6 @@ public static void main(String[] args) throws Exception {
         String fileListTrain[] = new String[fileList.length - 1];
         System.arraycopy(fileList, 0, fileListTrain, 0, fileNum);
         System.arraycopy(fileList, fileNum + 1, fileListTrain, fileNum, fileList.length - fileNum - 1);
-//        TimeUnit.SECONDS.sleep(1);
         FeatureExtractor featureExtractorObject = CreateDSLObjects.create_feature_extractor(fileListTrain);
         logger.log(Level.INFO, "Feature Extraction Done ! \n Starting model train !");
         DSL_main dsl_obj = new DSL_main(app.modelFilename, featureExtractorObject, true, true, false); // To re-train the model pass the value of load the model as false.
 
@@ -16,22 +16,21 @@
 
 /**
  * This class creates objects from csv file data.
- * @author rutujarane
- * 
-*/
+ *
+ * @author rutujarane , Bidisha Das Baksi (bidisha.bksh@gmail.com)
+ */
 
 public class CreateDSLObjects {
 
     static Logger logger = LogManager.getLogger(CreateDSLObjects.class.getName());
-    public static HashMap<String, SemType> sem_col ;
-    // Redo this function
-    public static String[][] readFile(String fileName){
+    public static HashMap<String, SemType> sem_col;
+
+    public static String[][] readFile(String fileName) {
         List<String[]> rowList = new ArrayList<String[]>();
         try (BufferedReader br = new BufferedReader(new FileReader(fileName))) {
             String line;
             while ((line = br.readLine()) != null) {
-                // logger.info("Line:"+line);
-                String[] lineItems = line.split(",",-1);
+                String[] lineItems = line.split(",", -1);
                 rowList.add(lineItems);
             }
             br.close();
@@ -42,86 +41,69 @@ public static String[][] readFile(String fileName){
                 matrix[i] = row;
             }
             return matrix;
-        }
-        catch(Exception e){
-            // Handle any I/O problems
+        } catch (Exception e) {
             logger.info("ERROR: File not readable");
         }
         String[][] matrix = new String[0][0];
         return matrix;
     }
 
-    public static void deleteFile(File file){
-        try
-        {
+    public static void deleteFile(File file) {
+        try {
             Files.deleteIfExists(Paths.get(file.getAbsolutePath()));
-        } 
-        catch(NoSuchFileException e) 
-        { 
-            logger.info("No such file/directory exists"); 
-        } 
-        catch(DirectoryNotEmptyException e) 
-        { 
-            logger.info("Directory is not empty."); 
-        } 
-        catch(IOException e) 
-        { 
-            logger.info("Invalid permissions."); 
+        } catch (NoSuchFileException e) {
+            logger.info("No such file/directory exists");
+        } catch (DirectoryNotEmptyException e) {
+            logger.info("Directory is not empty.");
+        } catch (IOException e) {
+            logger.info("Invalid permissions.");
         }
-        logger.info("Deletion successful."); 
+        logger.info("Deletion successful.");
     }
 
-    public static FeatureExtractor create_feature_extractor(String[] files) throws IOException{
+    public static FeatureExtractor create_feature_extractor(String[] files) throws IOException {
         List<ColumnBasedTable> columnBasedTableObj = new ArrayList<ColumnBasedTable>();
 
-        int kk=0;
-        for(String file: files){
-            // if (!file.contains("bundesliga"))
-            //     continue;
-            // file = "/Users/rutujarane/Desktop/ISI/Semantics/dsl/data/soccer2/2014 WC french.csv"; //test
-            String [][] data = readFile(file);
-            System.out.println("File gen:"+file);
-            if(data.length == 0){
-                logger.info("Warning: file not readable "+file);
+        int kk = 0;
+        for (String file : files) {
+            String[][] data = readFile(file);
+            System.out.println("File gen:" + file);
+            if (data.length == 0) {
+                logger.info("Warning: file not readable " + file);
                 continue;
             }
-            logger.info("Read the file"+file);
-            columnBasedTableObj.add(findDatatype(data,file));
+            logger.info("Read the file" + file);
+            columnBasedTableObj.add(findDatatype(data, file));
             kk++;
-            // if(kk>=1)
-            //     break;
         }
         return new FeatureExtractor(columnBasedTableObj);
 
     }
-    public static FeatureExtractor create_feature_extractor(HashMap<String,String[][]> dataMap) throws IOException{
+
+    public static FeatureExtractor create_feature_extractor(HashMap<String, String[][]> dataMap) throws IOException {
         List<ColumnBasedTable> columnBasedTableObj = new ArrayList<ColumnBasedTable>();
-        for(Map.Entry<String,String[][]> entry : dataMap.entrySet())
-        {
-            String data[][]  = entry.getValue();
+        for (Map.Entry<String, String[][]> entry : dataMap.entrySet()) {
+            String data[][] = entry.getValue();
             columnBasedTableObj.add(findDatatype(data, entry.getKey())); // Assuming tf idf is computed at token level and each cell value is not a whole token
         }
         return new FeatureExtractor(columnBasedTableObj);
 
     }
 
 
-    public static ColumnBasedTable findDatatype(String[][] data, String tableName){
-        logger.info("TabName:"+tableName);
-        // for(int i=0; i<data[0].length; i++){
-        //     System.out.print(data[1][i] + " ");
-        // }
+    public static ColumnBasedTable findDatatype(String[][] data, String tableName) {
+        logger.info("TabName:" + tableName);
         List<Column> columns = new ArrayList<Column>();
-        for(int index=0; index<data[0].length; index++){
-            List<String> colData = getColumnData(data,index);
+        for (int index = 0; index < data[0].length; index++) {
+            List<String> colData = getColumnData(data, index);
             SemType semTypeObj;
-            if(sem_col.containsKey(colData.get(0)))
-                semTypeObj =  sem_col.get(colData.get(0));
+            if (sem_col.containsKey(colData.get(0)))
+                semTypeObj = sem_col.get(colData.get(0));
             else
-                semTypeObj  = findSemType(colData.get(1));
+                semTypeObj = findSemType(colData.get(1));
             Hashtable<String, Float> typeStats = new Hashtable<String, Float>();
             Column columnObj = new Column(tableName, colData.get(0), semTypeObj, colData.get(2), data.length, typeStats);
-            List<String> colSubList = new ArrayList<String>(colData.subList(1,colData.size())); //3
+            List<String> colSubList = new ArrayList<String>(colData.subList(1, colData.size())); //3
             columnObj.value = new ColumnData(colSubList);
             columns.add(columnObj);
             logger.info("Column Object created");
@@ -130,16 +112,16 @@ public static ColumnBasedTable findDatatype(String[][] data, String tableName){
         return columnBasedTableObj;
     }
 
-    public static SemType findSemType(String colName){
-        String col[] = colName.trim().replaceAll("\"","").split("-");
-        SemType semTypeObj = new SemType(col[0],col[0]);
+    public static SemType findSemType(String colName) {
+        String col[] = colName.trim().replaceAll("\"", "").split("-");
+        SemType semTypeObj = new SemType(col[0], col[0]);
         return semTypeObj;
     }
 
-    public static List<String> getColumnData(String[][] data, int index){
+    public static List<String> getColumnData(String[][] data, int index) {
         List<String> column = new ArrayList<String>();
-        for(int i=0; i<data.length; i++){
-            column.add(data[i][index].trim().replaceAll("\"",""));
+        for (int i = 0; i < data.length; i++) {
+            column.add(data[i][index].trim().replaceAll("\"", ""));
         }
         return column;
     }
 
@@ -5,7 +5,7 @@
 
 /**
  * This class is responsible for creating a column object for each column.
- * @author rutujarane
+ * @author rutujarane, Bidisha Das Baksi (bidisha.bksh@gmail.com)
  */
 
 public class Column implements Serializable{
@@ -20,23 +20,19 @@ public class Column implements Serializable{
 
     public Column(String table_name, String name, SemType semantic_type, String typee, int sizee, Hashtable<String, Float> type_stats){
         this.id = table_name.concat(name);
-        // f"{table_name}:{name}"
         this.table_name = table_name;
         this.name = name;
         this.semantic_type = semantic_type;
         this.sizee = sizee;
         this.type_stats = type_stats;
         this.typee = typee;
-        // this.value = Optional[ColumnData] = null;
         this.value = null;
     }
 
     public List<String> get_textual_data(){
         if(this.value.string_data()) {
             return this.value.string_array;
         }
-//        else
-//            return this.value.number_array; // Removing this after comparing with the python implementation
         return new ArrayList<String>();
     }
 
 
@@ -17,7 +17,6 @@ public class ColumnBasedTable implements Serializable{
     public ColumnBasedTable(String id, List<Column> columns){
         this.id = id;
         this.columns = columns;
-        // self.name2colidx: Dict[str, int] = {cname.name: idx for idx, cname in enumerate(columns)}
         int i=0;
         for(Column col_name: columns){
             this.name2colidx.put(col_name.name.toString(), i);
 
@@ -6,11 +6,9 @@
 import org.apache.logging.log4j.Logger;
 import org.apache.logging.log4j.LogManager;
 
-// (object)
-
 /**
  * This class is responsible for creating an object of the data in every column.
- * @author rutujarane
+ * @author rutujarane, Bidisha Das Baksi (bidisha.bksh@gmail.com)
  */
 
 public class ColumnData implements Serializable{
@@ -22,25 +20,10 @@ public class ColumnData implements Serializable{
     List<Integer> string_idx_array = new ArrayList<Integer>();
 
     public ColumnData(List<String> array){
-        // for (Object object : array) {
-        //     this.array.add(Objects.toString(object, null));
-        // }
         this.array = array;
-        // for(int i=0; i<array.size(); i++){
-        //     logger.info(" "+array.get(i));
-        // }
-        // this.number_array = {};
-        // this.number_idx_array = {};
-        // this.string_array = {};
-        // this.string_idx_array = {};
-    
-        // for i, val in enumerate(array):
         int i=0;
         for(Object arr: array){
-            // logger.info(" "+arr);
             if(arr != null){
-                // if(isinstance(val, (int, float)){
-                
                 if(!string_data()){
                     this.number_array.add(arr.toString());
                     this.number_idx_array.add(i);
@@ -58,7 +41,7 @@ public ColumnData(List<String> array){
     public boolean string_data(){
         try 
         { 
-            // checking valid integer using parseInt() method 
+            // checking valid integer using parseDouble() method
             for(String arr: this.array)
                 Double.parseDouble(arr.toString());
             return false;
-Original file line number
+Diff line change
 *.log
 *.avro
 +*.arff
 karma-offline/karma.err