Fix for 0.3.0 (#105)

tohtsky · web-flow · commit 971d4b3ac35a · 2022-06-30T16:16:25.000+09:00
* More docs

* Update codecov-actions

* Use more explicit gcc options
diff --git a/.github/workflows/run-test.yml b/.github/workflows/run-test.yml
@@ -17,7 +17,8 @@ jobs:
         run: |
           pip install --upgrade pip
           sudo apt-get install lcov
-          CXXFLAGS="-O0 -g -coverage" pip install -e .
+          FLAGS="-fprofile-arcs -ftest-coverage"
+          CFLAGS="$FLAGS" CXXFLAGS="$FLAGS" pip install -e .
       - name: Run pytest
         run: |
           pip install pytest pytest-mock pytest-cov
@@ -28,10 +29,10 @@ jobs:
           coverage xml
           lcov -d `pwd` -c -o coverage.info
       - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v1
+        uses: codecov/codecov-action@v3
         with:
           files: ./coverage.xml,./coverage.info
+          name: codecov-umbrella
+          fail_ci_if_error: true
           verbose: false
           env_vars: OS,PYTHON
-          name: codecov-umbrella
-          fail_ci_if_error: false
diff --git a/create_pb_stubs.sh b/create_pb_stubs.sh
@@ -9,7 +9,7 @@ for module_name in "${modules[@]}"
 do
     echo "Create stub for $module_name"
     pybind11-stubgen -o stubs --no-setup-py "$module_name"
-    output_path="$(echo "${module_name}" | sed 's/\./\//g').pyi"
+    output_path="src/$(echo "${module_name}" | sed 's/\./\//g').pyi"
     input_path="stubs/$(echo "${module_name}" | sed 's/\./\//g')-stubs/__init__.pyi"
     rm "${output_path}"
     echo 'm: int
diff --git a/docs/source/api_reference.rst b/docs/source/api_reference.rst
@@ -95,3 +95,5 @@ Dataset
     MovieLens1MDataManager
     MovieLens100KDataManager
     MovieLens20MDataManager
+    NeuMFML1MDownloader
+    NeuMFPinterestDownloader
diff --git a/examples/1-vs-100-negative.ipynb b/examples/1-vs-100-negative.ipynb
diff --git a/run_autobuild.sh b/run_autobuild.sh
@@ -3,5 +3,5 @@
 sphinx-autobuild  \
     --host 0.0.0.0 \
     --port 9999 \
-    --watch irspack/ \
+    --watch src/irspack/ \
     docs/source docs/build
diff --git a/src/irspack/dataset/__init__.py b/src/irspack/dataset/__init__.py
@@ -5,11 +5,14 @@
     MovieLens20MDataManager,
     MovieLens100KDataManager,
 )
+from .neu_mf import NeuMFML1MDownloader, NeuMFMPinterestDownloader
 
 __all__ = [
     "MovieLens100KDataManager",
     "MovieLens1MDataManager",
     "MovieLens20MDataManager",
     "CiteULikeADataManager",
     "AmazonMusicDataManager",
+    "NeuMFML1MDownloader",
+    "NeuMFMPinterestDownloader",
 ]
diff --git a/src/irspack/dataset/downloader.py b/src/irspack/dataset/downloader.py
@@ -17,6 +17,15 @@ def _save_to_zippath(self, path: Path) -> None:
     def __init__(
         self, zippath: Optional[Union[Path, str]] = None, force_download: bool = False
     ):
+        """Specify the zip path for dataset. If that path does not exist, try downloading the relevant data from online resources.
+
+        Args:
+            zippath (Optional[Union[Path, str]], optional): _description_. Defaults to None.
+            force_download (bool, optional): _description_. Defaults to False.
+
+        Raises:
+            RuntimeError: _description_
+        """
         if zippath is None:
             zippath = self.DEFAULT_PATH
         zippath = Path(zippath)
diff --git a/src/irspack/dataset/neu_mf.py b/src/irspack/dataset/neu_mf.py
@@ -54,13 +54,33 @@ def read_train_test(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
 
 
 class NeuMFML1MDownloader(NeuMFDownloader):
+    r"""Manages MovieLens 1M dataset split under 1-vs-100 negative evaluation protocol.
+
+    Args:
+        zippath:
+            Where the zipped data is located. If `None`, assumes the path to be `~/.neumf-ml-1m.zip`.
+            If the designated path does not exist, you will be prompted for the permission to download the data.
+            Defaults to `None`.
+        force_download:
+            If `True`, the class will not prompt for the permission and start downloading immediately.
+    """
     DEFAULT_PATH = Path("~/.neumf-ml-1m.zip").expanduser()
 
     TRAIN_URL = "https://raw.githubusercontent.com/tohtsky/neural_collaborative_filtering/master/Data/ml-1m.train.rating"
     NEGATIVE_URL = "https://raw.githubusercontent.com/tohtsky/neural_collaborative_filtering/master/Data/ml-1m.test.negative"
 
 
 class NeuMFMPinterestDownloader(NeuMFDownloader):
+    r"""Manages Pinterest dataset split under 1-vs-100 negative evaluation protocol.
+
+    Args:
+        zippath:
+            Where the zipped data is located. If `None`, assumes the path to be `~/.neumf-pinterest.zip`.
+            If the designated path does not exist, you will be prompted for the permission to download the data.
+            Defaults to `None`.
+        force_download:
+            If `True`, the class will not prompt for the permission and start downloading immediately.
+    """
     DEFAULT_PATH = Path("~/.neumf-pinterest.zip").expanduser()
 
     TRAIN_URL = "https://raw.githubusercontent.com/tohtsky/neural_collaborative_filtering/master/Data/pinterest-20.train.rating"
diff --git a/src/irspack/recommenders/base.py b/src/irspack/recommenders/base.py
@@ -84,11 +84,13 @@ class BaseRecommender(object, metaclass=RecommenderMeta):
     config_class: Type[RecommenderConfig]
     default_tune_range: List[ParameterRange]
 
+    X_train_all: sps.csr_matrix
+    """The matrix to feed into recommender."""
+
     def __init__(self, X_train_all: InteractionMatrix, **kwargs: Any) -> None:
         self.X_train_all: sps.csr_matrix = sps.csr_matrix(X_train_all).astype(
             np.float64
         )
-        """The matrix to feed into recommender."""
 
         self.n_users: int = self.X_train_all.shape[0]
         self.n_items: int = self.X_train_all.shape[1]
@@ -387,18 +389,18 @@ def get_score_cold_user_remove_seen(self, X: InteractionMatrix) -> DenseScoreArr
 class BaseSimilarityRecommender(BaseRecommender):
     """The computed item-item similarity. Might not be initialized before `learn()` is called."""
 
-    W_: Optional[Union[sps.csr_matrix, sps.csc_matrix, np.ndarray]]
+    _W: Optional[Union[sps.csr_matrix, sps.csc_matrix, np.ndarray]]
 
     def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
-        self.W_ = None
+        self._W = None
 
     @property
     def W(self) -> Union[sps.csr_matrix, sps.csc_matrix, np.ndarray]:
         """The computed item-item similarity weight matrix."""
-        if self.W_ is None:
+        if self._W is None:
             raise RuntimeError("W fetched before fit.")
-        return self.W_
+        return self._W
 
     def get_score(self, user_indices: UserIndexArray) -> DenseScoreArray:
         return _sparse_to_array(self.X_train_all[user_indices].dot(self.W))
diff --git a/src/irspack/recommenders/dense_slim.py b/src/irspack/recommenders/dense_slim.py
@@ -50,4 +50,4 @@ def _learn(self) -> None:
         P_dense *= -diag_P_inv[np.newaxis, :]
         range_ = np.arange(self.n_items)
         P_dense[range_, range_] = 0
-        self.W_ = P_dense
+        self._W = P_dense
diff --git a/src/irspack/recommenders/edlae.py b/src/irspack/recommenders/edlae.py
@@ -63,4 +63,4 @@ def _learn(self) -> None:
         P_dense *= -diag_P_inv[np.newaxis, :]
         range_ = np.arange(self.n_items)
         P_dense[range_, range_] = 0
-        self.W_ = P_dense
+        self._W = P_dense
diff --git a/src/irspack/recommenders/knn.py b/src/irspack/recommenders/knn.py
@@ -75,7 +75,7 @@ def _learn(self) -> None:
             raise RuntimeError("Unknown weighting scheme.")
 
         computer = self._create_computer(X_weighted.T)
-        self.W_ = remove_diagonal(
+        self._W = remove_diagonal(
             computer.compute_similarity(self.X_train_all.T, self.top_k)
         ).tocsc()
 
diff --git a/src/irspack/recommenders/multvae.py b/src/irspack/recommenders/multvae.py
@@ -291,6 +291,33 @@ class MultVAEConfig(BaseEarlyStoppingRecommenderConfig):
 
 
 class MultVAERecommender(BaseRecommenderWithEarlyStopping):
+    r"""JAX implementation of Mult-VAE, presented in `"Variational Autoencoders for Collaborative Filtering" <https://arxiv.org/abs/1802.05814>`_.
+
+    Args:
+        X_train_all:
+            The source data.
+        dim_z:
+            The latend dimension.
+        enc_hidden_dims:
+            The encoder's intermediate layer dimension.
+        dec_hidden_dims:
+            The dimensions of the decoder's intermediate layers.
+        dropout_p:
+            Dropout ratio. Defaults to 0.5.
+        l2_regularizer:
+            L2 regularization coefficient. Defaults to 0.
+        kl_anneal_goal:
+            beta of beta-VAE. Defaults to 0.2.
+        anneal_end_epoch:
+            The epoch to complete the annealing.. Defaults to 50.
+        minibatch_size (int, optional):
+            Minibatch size. Defaults to 512.
+        train_epochs:
+            The number of epochs to run. Defaults to 300.
+        learning_rate:
+            Adam optimizer's learning rate. Defaults to 1e-3.
+    """
+
     config_class = MultVAEConfig
 
     default_tune_range = [
diff --git a/src/irspack/recommenders/p3.py b/src/irspack/recommenders/p3.py
@@ -71,6 +71,6 @@ def _learn(self) -> None:
             n_threads=self.n_threads,
         )
         top_k = self.X_train_all.shape[1] if self.top_k is None else self.top_k
-        self.W_ = computer.compute_W(self.X_train_all.T, top_k)
+        self._W = computer.compute_W(self.X_train_all.T, top_k)
         if self.normalize_weight:
-            self.W_ = l1_normalize_row(self.W_)
+            self._W = l1_normalize_row(self._W)
diff --git a/src/irspack/recommenders/rp3.py b/src/irspack/recommenders/rp3.py
@@ -21,7 +21,7 @@ class RP3betaConfig(RecommenderConfig):
 
 
 class RP3betaRecommender(BaseSimilarityRecommender):
-    """3-Path random walk with the item-popularity penalization:
+    r"""3-Path random walk with the item-popularity penalization:
 
         - `Updatable, Accurate, Diverse, and Scalable Recommendations for Interactive Applications
           <https://dl.acm.org/doi/10.1145/2955101>`_
@@ -79,6 +79,6 @@ def _learn(self) -> None:
             n_threads=self.n_threads,
         )
         top_k = self.X_train_all.shape[1] if self.top_k is None else self.top_k
-        self.W_ = computer.compute_W(self.X_train_all.T, top_k)
+        self._W = computer.compute_W(self.X_train_all.T, top_k)
         if self.normalize_weight:
-            self.W_ = l1_normalize_row(self.W_)
+            self._W = l1_normalize_row(self._W)
diff --git a/src/irspack/recommenders/slim.py b/src/irspack/recommenders/slim.py
@@ -79,7 +79,7 @@ def _learn(self) -> None:
         l1_coeff = self.n_users * self.alpha * self.l1_ratio
 
         if self.positive_only:
-            self.W_ = slim_weight_positive_only(
+            self._W = slim_weight_positive_only(
                 self.X_train_all,
                 n_threads=self.n_threads,
                 n_iter=self.n_iter,
@@ -89,7 +89,7 @@ def _learn(self) -> None:
                 top_k=-1 if self.top_k is None else self.top_k,
             )
         else:
-            self.W_ = slim_weight_allow_negative(
+            self._W = slim_weight_allow_negative(
                 self.X_train_all,
                 n_threads=self.n_threads,
                 n_iter=self.n_iter,

Original file line number	Diff line number	Diff line change
`@@ -5,11 +5,14 @@`
`5`	`5`	`MovieLens20MDataManager,`
`6`	`6`	`MovieLens100KDataManager,`
`7`	`7`	`)`
	`8`	`+from .neu_mf import NeuMFML1MDownloader, NeuMFMPinterestDownloader`
`8`	`9`
`9`	`10`	`__all__ = [`
`10`	`11`	`"MovieLens100KDataManager",`
`11`	`12`	`"MovieLens1MDataManager",`
`12`	`13`	`"MovieLens20MDataManager",`
`13`	`14`	`"CiteULikeADataManager",`
`14`	`15`	`"AmazonMusicDataManager",`
	`16`	`+ "NeuMFML1MDownloader",`
	`17`	`+ "NeuMFMPinterestDownloader",`
`15`	`18`	`]`
Original file line number	Diff line number	Diff line change
`@@ -71,6 +71,6 @@ def _learn(self) -> None:`
`71`	`71`	`n_threads=self.n_threads,`
`72`	`72`	`)`
`73`	`73`	`top_k = self.X_train_all.shape[1] if self.top_k is None else self.top_k`
`74`		`- self.W_ = computer.compute_W(self.X_train_all.T, top_k)`
	`74`	`+ self._W = computer.compute_W(self.X_train_all.T, top_k)`
`75`	`75`	`if self.normalize_weight:`
`76`		`- self.W_ = l1_normalize_row(self.W_)`
	`76`	`+ self._W = l1_normalize_row(self._W)`