asreviewcontrib.simulation.api

The main Application Programming Interface (API) for the asreview-simulation package.

Example usage:
from asreviewcontrib.simulation.api import Config
from asreviewcontrib.simulation.api import draw_sample
from asreviewcontrib.simulation.api import get_pyll


# (do something interesting with draw_sample, get_pyll, and Config)
 1"""
 2The main Application Programming Interface (API) for the `asreview-simulation` package.
 3
 4Example usage:
 5
 6    ```python
 7    from asreviewcontrib.simulation.api import Config
 8    from asreviewcontrib.simulation.api import draw_sample
 9    from asreviewcontrib.simulation.api import get_pyll
10
11
12    # (do something interesting with draw_sample, get_pyll, and Config)
13    ```
14"""
15from asreviewcontrib.simulation._private.lib.config import Config
16from asreviewcontrib.simulation._private.lib.config import OneModelConfig
17from asreviewcontrib.simulation._private.lib.draw_sample import draw_sample
18from asreviewcontrib.simulation._private.lib.get_abbrs import get_abbrs
19from asreviewcontrib.simulation._private.lib.get_dataset_names import get_dataset_names
20from asreviewcontrib.simulation._private.lib.get_default_params import get_default_params
21from asreviewcontrib.simulation._private.lib.get_pyll import get_pyll
22from asreviewcontrib.simulation._private.lib.prep_project_directory import prep_project_directory
23from asreviewcontrib.simulation._private.lib.run import run
24from asreviewcontrib.simulation.api import extending
25from asreviewcontrib.simulation.api import plotting
26from asreviewcontrib.simulation.api import unwrapping
27
28
29__all__ = [
30    "Config",
31    "OneModelConfig",
32    "extending",
33    "draw_sample",
34    "get_abbrs",
35    "get_default_params",
36    "get_dataset_names",
37    "get_pyll",
38    "plotting",
39    "prep_project_directory",
40    "run",
41    "unwrapping",
42]
class Config:
127class Config:
128    """Stores the configuration for all 7 types of model necessary to run an ASReview simulation."""
129
130    _errmsg = "Expected an instance of OneModelConfig"
131
132    def __init__(
133        self,
134        *,
135        bal: Optional[OneModelConfig] = None,
136        clr: Optional[OneModelConfig] = None,
137        fex: Optional[OneModelConfig] = None,
138        ofn: Optional[OneModelConfig] = None,
139        qry: Optional[OneModelConfig] = None,
140        sam: Optional[OneModelConfig] = None,
141        stp: Optional[OneModelConfig] = None,
142    ):
143        """
144        Args:
145            bal:
146                The configuration for the balancer model.
147            clr:
148                The configuration for the classifier model.
149            fex:
150                The configuration for the feature extraction model.
151            ofn:
152                The configuration for the objective function model.
153            qry:
154                The configuration for the query model.
155            sam:
156                The configuration for the prior sampling model.
157            stp:
158                The configuration for the stopping model.
159
160        Synopsis:
161
162            Constructor method.
163
164        Example usage:
165
166            1. Default choice for each model type, default configuration for each model.
167                ```python
168                from asreviewcontrib.simulation.api import Config
169
170
171                config = Config()
172                ```
173            2. Default choice for each model type except balancer, default
174                parameter values for all models.
175                ```python
176                from asreviewcontrib.simulation.api import Config
177                from asreviewcontrib.simulation.api import OneModelConfig
178
179
180                bal = OneModelConfig(abbr="bal-simple")
181                config = Config(bal=bal)
182                ```
183            3. Custom model choice for sampling model and for stopping model,
184                other model types use their default choice and parameterization.
185                ```python
186                from asreviewcontrib.simulation.api import Config
187                from asreviewcontrib.simulation.api import OneModelConfig
188
189
190                custom = {
191                    "sam": OneModelConfig(
192                        abbr="sam-random",
193                        params={
194                            "n_excluded": 10,
195                            "n_included": 10,
196                        }
197                    ),
198                    "stp": OneModelConfig(abbr="stp-nq"),
199                }
200                config = Config(**custom)
201                ```
202        """
203
204        # use the setter methods to assign constructor arguments to private attributes
205        self.bal = bal or OneModelConfig("bal-double")
206        self.clr = clr or OneModelConfig("clr-nb")
207        self.fex = fex or OneModelConfig("fex-tfidf")
208        self.ofn = ofn or OneModelConfig("ofn-none")
209        self.qry = qry or OneModelConfig("qry-max")
210        self.sam = sam or OneModelConfig("sam-random")
211        self.stp = stp or OneModelConfig("stp-rel")
212
213    def __eq__(self, other) -> bool:
214        """Tests whether all of `self`'s 7 model configurations are equal to those
215        of the `other` instance."""
216        assert isinstance(other, Config), "Can only compare to objects of equal type."
217        return False not in {
218            self._bal == other._bal,
219            self._clr == other._clr,
220            self._fex == other._fex,
221            self._ofn == other._ofn,
222            self._qry == other._qry,
223            self._sam == other._sam,
224            self._stp == other._stp,
225        }
226
227    def as_dict(self, recurse=True) -> ConfigDict:
228        """
229        Returns:
230
231             A `dict` representation of the 7 different model configurations. When `recurse`
232            is `True` (as is the default), each of the 7 `OneModelConfig`s are themselves also turned
233            into a `dict` using their `.as_dict()` method; if `recurse` is `False`, the returned object
234            is a `dict` whose values are of class `OneModelConfig`.
235        """
236        return {
237            "bal": self._bal.as_dict() if recurse else self._bal,
238            "clr": self._clr.as_dict() if recurse else self._clr,
239            "fex": self._fex.as_dict() if recurse else self._fex,
240            "ofn": self._ofn.as_dict() if recurse else self._ofn,
241            "qry": self._qry.as_dict() if recurse else self._qry,
242            "sam": self._sam.as_dict() if recurse else self._stp,
243            "stp": self._stp.as_dict() if recurse else self._sam,
244        }
245
246    def flattened(self) -> Dict[str, Any]:
247        """
248        Returns:
249
250            A flattened version of the 7 model configurations as a `dict` whose keys consist
251            of the name of the model and the name of its parameters.
252        """
253        d = {}
254        d.update(self.bal.flattened())
255        d.update(self.clr.flattened())
256        d.update(self.fex.flattened())
257        d.update(self.ofn.flattened())
258        d.update(self.qry.flattened())
259        d.update(self.sam.flattened())
260        d.update(self.stp.flattened())
261        return d
262
263    @property
264    def bal(self) -> OneModelConfig:
265        """The configuration for the balancer model."""
266        return self._bal
267
268    @bal.setter
269    def bal(self, bal: OneModelConfig):
270        """The configuration for the balancer model."""
271        assert isinstance(bal, OneModelConfig), Config._errmsg
272        assert bal.abbr.startswith("bal"), "Expected a balancer model."
273        self._bal = bal
274
275    @property
276    def clr(self) -> OneModelConfig:
277        """The configuration for the classifier model."""
278        return self._clr
279
280    @clr.setter
281    def clr(self, clr: OneModelConfig):
282        """The configuration for the classifier model."""
283        assert isinstance(clr, OneModelConfig), Config._errmsg
284        assert clr.abbr.startswith("clr"), "Expected a classifier model."
285        self._clr = clr
286
287    @property
288    def fex(self) -> OneModelConfig:
289        """The configuration for the feature extraction model."""
290        return self._fex
291
292    @fex.setter
293    def fex(self, fex: OneModelConfig):
294        """The configuration for the feature extraction model."""
295        assert isinstance(fex, OneModelConfig), Config._errmsg
296        assert fex.abbr.startswith("fex"), "Expected a feature extraction model."
297        self._fex = fex
298
299    @property
300    def ofn(self) -> OneModelConfig:
301        """The configuration for the objective function model."""
302        return self._ofn
303
304    @ofn.setter
305    def ofn(self, ofn: OneModelConfig):
306        """The configuration for the objective function model."""
307        assert isinstance(ofn, OneModelConfig), Config._errmsg
308        assert ofn.abbr.startswith("ofn"), "Expected an objective function model."
309        self._ofn = ofn
310
311    @property
312    def qry(self) -> OneModelConfig:
313        """The configuration for the query model."""
314        return self._qry
315
316    @qry.setter
317    def qry(self, qry: OneModelConfig):
318        """The configuration for the query model."""
319        assert isinstance(qry, OneModelConfig), Config._errmsg
320        assert qry.abbr.startswith("qry"), "Expected a query model."
321        self._qry = qry
322
323    @property
324    def sam(self) -> OneModelConfig:
325        """The configuration for the sampling model."""
326        return self._sam
327
328    @sam.setter
329    def sam(self, sam: OneModelConfig):
330        """The configuration for the sampling model."""
331        assert isinstance(sam, OneModelConfig), Config._errmsg
332        assert sam.abbr.startswith("sam"), "Expected a sampler model."
333        self._sam = sam
334
335    @property
336    def stp(self) -> OneModelConfig:
337        """The configuration for the stopping model."""
338        return self._stp
339
340    @stp.setter
341    def stp(self, stp: OneModelConfig):
342        """The configuration for the stopping model."""
343        assert isinstance(stp, OneModelConfig), Config._errmsg
344        assert stp.abbr.startswith("stp"), "Expected a stopping model."
345        self._stp = stp

Stores the configuration for all 7 types of model necessary to run an ASReview simulation.

Config( *, bal: Optional[OneModelConfig] = None, clr: Optional[OneModelConfig] = None, fex: Optional[OneModelConfig] = None, ofn: Optional[OneModelConfig] = None, qry: Optional[OneModelConfig] = None, sam: Optional[OneModelConfig] = None, stp: Optional[OneModelConfig] = None)
132    def __init__(
133        self,
134        *,
135        bal: Optional[OneModelConfig] = None,
136        clr: Optional[OneModelConfig] = None,
137        fex: Optional[OneModelConfig] = None,
138        ofn: Optional[OneModelConfig] = None,
139        qry: Optional[OneModelConfig] = None,
140        sam: Optional[OneModelConfig] = None,
141        stp: Optional[OneModelConfig] = None,
142    ):
143        """
144        Args:
145            bal:
146                The configuration for the balancer model.
147            clr:
148                The configuration for the classifier model.
149            fex:
150                The configuration for the feature extraction model.
151            ofn:
152                The configuration for the objective function model.
153            qry:
154                The configuration for the query model.
155            sam:
156                The configuration for the prior sampling model.
157            stp:
158                The configuration for the stopping model.
159
160        Synopsis:
161
162            Constructor method.
163
164        Example usage:
165
166            1. Default choice for each model type, default configuration for each model.
167                ```python
168                from asreviewcontrib.simulation.api import Config
169
170
171                config = Config()
172                ```
173            2. Default choice for each model type except balancer, default
174                parameter values for all models.
175                ```python
176                from asreviewcontrib.simulation.api import Config
177                from asreviewcontrib.simulation.api import OneModelConfig
178
179
180                bal = OneModelConfig(abbr="bal-simple")
181                config = Config(bal=bal)
182                ```
183            3. Custom model choice for sampling model and for stopping model,
184                other model types use their default choice and parameterization.
185                ```python
186                from asreviewcontrib.simulation.api import Config
187                from asreviewcontrib.simulation.api import OneModelConfig
188
189
190                custom = {
191                    "sam": OneModelConfig(
192                        abbr="sam-random",
193                        params={
194                            "n_excluded": 10,
195                            "n_included": 10,
196                        }
197                    ),
198                    "stp": OneModelConfig(abbr="stp-nq"),
199                }
200                config = Config(**custom)
201                ```
202        """
203
204        # use the setter methods to assign constructor arguments to private attributes
205        self.bal = bal or OneModelConfig("bal-double")
206        self.clr = clr or OneModelConfig("clr-nb")
207        self.fex = fex or OneModelConfig("fex-tfidf")
208        self.ofn = ofn or OneModelConfig("ofn-none")
209        self.qry = qry or OneModelConfig("qry-max")
210        self.sam = sam or OneModelConfig("sam-random")
211        self.stp = stp or OneModelConfig("stp-rel")
Arguments:
  • bal: The configuration for the balancer model.
  • clr: The configuration for the classifier model.
  • fex: The configuration for the feature extraction model.
  • ofn: The configuration for the objective function model.
  • qry: The configuration for the query model.
  • sam: The configuration for the prior sampling model.
  • stp: The configuration for the stopping model.
Synopsis:

Constructor method.

Example usage:
  1. Default choice for each model type, default configuration for each model.

    from asreviewcontrib.simulation.api import Config
    
    
    config = Config()
    
  2. Default choice for each model type except balancer, default parameter values for all models.

    from asreviewcontrib.simulation.api import Config
    from asreviewcontrib.simulation.api import OneModelConfig
    
    
    bal = OneModelConfig(abbr="bal-simple")
    config = Config(bal=bal)
    
  3. Custom model choice for sampling model and for stopping model, other model types use their default choice and parameterization.

    from asreviewcontrib.simulation.api import Config
    from asreviewcontrib.simulation.api import OneModelConfig
    
    
    custom = {
        "sam": OneModelConfig(
            abbr="sam-random",
            params={
                "n_excluded": 10,
                "n_included": 10,
            }
        ),
        "stp": OneModelConfig(abbr="stp-nq"),
    }
    config = Config(**custom)
    
bal: OneModelConfig
263    @property
264    def bal(self) -> OneModelConfig:
265        """The configuration for the balancer model."""
266        return self._bal

The configuration for the balancer model.

clr: OneModelConfig
275    @property
276    def clr(self) -> OneModelConfig:
277        """The configuration for the classifier model."""
278        return self._clr

The configuration for the classifier model.

fex: OneModelConfig
287    @property
288    def fex(self) -> OneModelConfig:
289        """The configuration for the feature extraction model."""
290        return self._fex

The configuration for the feature extraction model.

ofn: OneModelConfig
299    @property
300    def ofn(self) -> OneModelConfig:
301        """The configuration for the objective function model."""
302        return self._ofn

The configuration for the objective function model.

qry: OneModelConfig
311    @property
312    def qry(self) -> OneModelConfig:
313        """The configuration for the query model."""
314        return self._qry

The configuration for the query model.

sam: OneModelConfig
323    @property
324    def sam(self) -> OneModelConfig:
325        """The configuration for the sampling model."""
326        return self._sam

The configuration for the sampling model.

stp: OneModelConfig
335    @property
336    def stp(self) -> OneModelConfig:
337        """The configuration for the stopping model."""
338        return self._stp

The configuration for the stopping model.

def as_dict( self, recurse=True) -> asreviewcontrib.simulation._private.lib.config.ConfigDict:
227    def as_dict(self, recurse=True) -> ConfigDict:
228        """
229        Returns:
230
231             A `dict` representation of the 7 different model configurations. When `recurse`
232            is `True` (as is the default), each of the 7 `OneModelConfig`s are themselves also turned
233            into a `dict` using their `.as_dict()` method; if `recurse` is `False`, the returned object
234            is a `dict` whose values are of class `OneModelConfig`.
235        """
236        return {
237            "bal": self._bal.as_dict() if recurse else self._bal,
238            "clr": self._clr.as_dict() if recurse else self._clr,
239            "fex": self._fex.as_dict() if recurse else self._fex,
240            "ofn": self._ofn.as_dict() if recurse else self._ofn,
241            "qry": self._qry.as_dict() if recurse else self._qry,
242            "sam": self._sam.as_dict() if recurse else self._stp,
243            "stp": self._stp.as_dict() if recurse else self._sam,
244        }
Returns:

A dict representation of the 7 different model configurations. When recurse is True (as is the default), each of the 7 OneModelConfigs are themselves also turned into a dict using their .as_dict() method; if recurse is False, the returned object is a dict whose values are of class OneModelConfig.

def flattened(self) -> Dict[str, Any]:
246    def flattened(self) -> Dict[str, Any]:
247        """
248        Returns:
249
250            A flattened version of the 7 model configurations as a `dict` whose keys consist
251            of the name of the model and the name of its parameters.
252        """
253        d = {}
254        d.update(self.bal.flattened())
255        d.update(self.clr.flattened())
256        d.update(self.fex.flattened())
257        d.update(self.ofn.flattened())
258        d.update(self.qry.flattened())
259        d.update(self.sam.flattened())
260        d.update(self.stp.flattened())
261        return d
Returns:

A flattened version of the 7 model configurations as a dict whose keys consist of the name of the model and the name of its parameters.

class OneModelConfig:
 18class OneModelConfig:
 19    """
 20    Stores the configuration for one model, e.g. the balancer, the stopping model,
 21    or the objective function model.
 22    """
 23
 24    def __init__(self, abbr: str, params: Optional[TParams] = None):
 25        """
 26        Args:
 27            abbr:
 28                The model abbreviation.
 29            params:
 30                The model parameters.
 31
 32        Synopsis:
 33
 34            Constructor method.
 35
 36        Example usage:
 37
 38            1. Default parameter values given the choice for `bal-simple`.
 39                ```python
 40                from asreviewcontrib.simulation.api import OneModelConfig
 41
 42
 43                bal = OneModelConfig(abbr="bal-simple")
 44                ```
 45            2. Custom parameter values for the selected model choice `stp-nq`.
 46                ```python
 47                from asreviewcontrib.simulation.api import OneModelConfig
 48
 49
 50                stp = OneModelConfig(abbr="stp-nq", params={"n_queries: 20"})
 51                ```
 52            3. Partially custom parameter values for the selected model choice `qry-max-random`.
 53                ```python
 54                from asreviewcontrib.simulation.api import OneModelConfig
 55
 56
 57                qry = OneModelConfig(abbr="qry-max-random", params={"n_instances: 10"})
 58                ```
 59        """
 60        assert isinstance(abbr, str), "Expected input argument 'abbr' to be of type 'str'"
 61        default_params: TParams = get_default_params(abbr)
 62        self._abbr = abbr
 63        self._params = default_params
 64        if params is not None:
 65            assert isinstance(params, dict), "Expected input argument 'params' to be of type 'dict'"
 66            valid_keys = default_params.keys()
 67            provided_keys = params.keys()
 68            for key in provided_keys:
 69                assert key in valid_keys, f"Can't update parameters for model '{self._abbr}' using unknown key '{key}'."
 70                self._params[key] = params[key]
 71
 72    def __eq__(self, other: Any) -> bool:
 73        """Test whether this instance of `OneModelConfig` is exactly equal to the `other` instance
 74        with respect to the set of key names and their values."""
 75        assert isinstance(other, OneModelConfig), "Can only compare to objects of equal type."
 76        cond1 = self._abbr == other._abbr
 77        cond2 = set(self._params) == set(other._params)
 78        cond3 = False not in {self._params[k] == other._params[k] for k in self._params.keys()}
 79        return cond1 and cond2 and cond3
 80
 81    def as_dict(self) -> OneModelConfigDict:
 82        """
 83        Returns:
 84
 85             The model configuration as a `dict`.
 86        """
 87        return {
 88            "abbr": self._abbr,
 89            "params": self._params,
 90        }
 91
 92    def flattened(self) -> Dict[str, Any]:
 93        """
 94        Returns:
 95
 96            A flattened version of the model configuration as a `dict` whose keys consist
 97            of the name of the model and the name of its parameters.
 98        """
 99        d = {}
100        for param in self.params.keys():
101            k = "/".join([self.abbr, param])
102            v = self.params[param]
103            d.update({k: v})
104        return d
105
106    @property
107    def abbr(self) -> str:
108        """The model abbreviation (read-only)."""
109        return self._abbr
110
111    @property
112    def params(self) -> TParams:
113        """The model parameterization (read-only)."""
114        return self._params

Stores the configuration for one model, e.g. the balancer, the stopping model, or the objective function model.

OneModelConfig(abbr: str, params: Optional[Dict[str, Any]] = None)
24    def __init__(self, abbr: str, params: Optional[TParams] = None):
25        """
26        Args:
27            abbr:
28                The model abbreviation.
29            params:
30                The model parameters.
31
32        Synopsis:
33
34            Constructor method.
35
36        Example usage:
37
38            1. Default parameter values given the choice for `bal-simple`.
39                ```python
40                from asreviewcontrib.simulation.api import OneModelConfig
41
42
43                bal = OneModelConfig(abbr="bal-simple")
44                ```
45            2. Custom parameter values for the selected model choice `stp-nq`.
46                ```python
47                from asreviewcontrib.simulation.api import OneModelConfig
48
49
50                stp = OneModelConfig(abbr="stp-nq", params={"n_queries: 20"})
51                ```
52            3. Partially custom parameter values for the selected model choice `qry-max-random`.
53                ```python
54                from asreviewcontrib.simulation.api import OneModelConfig
55
56
57                qry = OneModelConfig(abbr="qry-max-random", params={"n_instances: 10"})
58                ```
59        """
60        assert isinstance(abbr, str), "Expected input argument 'abbr' to be of type 'str'"
61        default_params: TParams = get_default_params(abbr)
62        self._abbr = abbr
63        self._params = default_params
64        if params is not None:
65            assert isinstance(params, dict), "Expected input argument 'params' to be of type 'dict'"
66            valid_keys = default_params.keys()
67            provided_keys = params.keys()
68            for key in provided_keys:
69                assert key in valid_keys, f"Can't update parameters for model '{self._abbr}' using unknown key '{key}'."
70                self._params[key] = params[key]
Arguments:
  • abbr: The model abbreviation.
  • params: The model parameters.
Synopsis:

Constructor method.

Example usage:
  1. Default parameter values given the choice for bal-simple.

    from asreviewcontrib.simulation.api import OneModelConfig
    
    
    bal = OneModelConfig(abbr="bal-simple")
    
  2. Custom parameter values for the selected model choice stp-nq.

    from asreviewcontrib.simulation.api import OneModelConfig
    
    
    stp = OneModelConfig(abbr="stp-nq", params={"n_queries: 20"})
    
  3. Partially custom parameter values for the selected model choice qry-max-random.

    from asreviewcontrib.simulation.api import OneModelConfig
    
    
    qry = OneModelConfig(abbr="qry-max-random", params={"n_instances: 10"})
    
def as_dict( self) -> asreviewcontrib.simulation._private.lib.config.OneModelConfigDict:
81    def as_dict(self) -> OneModelConfigDict:
82        """
83        Returns:
84
85             The model configuration as a `dict`.
86        """
87        return {
88            "abbr": self._abbr,
89            "params": self._params,
90        }
Returns:

The model configuration as a dict.

def flattened(self) -> Dict[str, Any]:
 92    def flattened(self) -> Dict[str, Any]:
 93        """
 94        Returns:
 95
 96            A flattened version of the model configuration as a `dict` whose keys consist
 97            of the name of the model and the name of its parameters.
 98        """
 99        d = {}
100        for param in self.params.keys():
101            k = "/".join([self.abbr, param])
102            v = self.params[param]
103            d.update({k: v})
104        return d
Returns:

A flattened version of the model configuration as a dict whose keys consist of the name of the model and the name of its parameters.

abbr: str
106    @property
107    def abbr(self) -> str:
108        """The model abbreviation (read-only)."""
109        return self._abbr

The model abbreviation (read-only).

params: Dict[str, Any]
111    @property
112    def params(self) -> TParams:
113        """The model parameterization (read-only)."""
114        return self._params

The model parameterization (read-only).

def draw_sample( pyll: Dict[str, hyperopt.pyll.base.Apply]) -> Dict[str, OneModelConfig]:
11def draw_sample(pyll: TPyllDict) -> TSampled:
12    """
13    Args:
14        pyll:
15            The Pyll program `dict`.
16
17    Returns:
18
19        A dictionary with model type abbreviation for each key (`bal`, `fex`,
20        `stp`, etc), and the corresponding parameterization as randomly drawn
21        by `hyperopt.rand.pyll.stochastic.sample`.
22
23    Synopsis:
24
25        Convenience function around `hyperopt.rand.pyll.stochastic.sample` to draw
26        a random sample given a Pyll program `dict`, i.e. input argument `pyll`. The returned
27        object can be directly passed into `Config`'s constructor by using keyword unpacking
28        `**`, for example like so:
29
30        ```python
31        from asreviewcontrib.simulation.api import Config
32        from asreviewcontrib.simulation.api import draw_sample
33        from asreviewcontrib.simulation.api import get_pyll
34        from asreviewcontrib.simulation.api import OneModelConfig
35
36
37        fixed = {
38            "ofn": OneModelConfig(abbr="ofn-wss", params={"at_pct": 90}),
39            "qry": OneModelConfig(abbr="qry-max", params={"n_instances": 10}),
40        }
41
42        pyll = {
43            "bal": get_pyll("bal-double"),
44            "fex": get_pyll("fex-tfidf"),
45        }
46
47        # use pyll programs to draw a parameterization for 'bal' and 'fex'
48        drawn = draw_sample(pyll)
49
50        # construct an all-model config from one-model configs -- implicitly use default
51        # model choice and parameterization for models not included as argument
52        config = Config(**fixed, **drawn)
53        ```
54    """
55    valid_keys = {"bal", "clr", "fex", "ofn", "qry", "sam", "stp"}
56    assert isinstance(pyll, dict), "Expected input argument pyll to be of type 'dict'."
57    for key in pyll.keys():
58        assert key in valid_keys, f"Unexpected key '{key}' in keys of input argument 'pyll'."
59    sample = hyperopt.rand.pyll.stochastic.sample(pyll)
60    d = {}
61    for key in sample.keys():
62        abbr = sample[key]["abbr"]
63        params = sample[key]["params"]
64        pair = {
65            key: OneModelConfig(abbr=abbr, params=params),
66        }
67        d.update(pair)
68    return d
Arguments:
  • pyll: The Pyll program dict.
Returns:

A dictionary with model type abbreviation for each key (bal, fex, stp, etc), and the corresponding parameterization as randomly drawn by hyperopt.rand.pyll.stochastic.sample.

Synopsis:

Convenience function around hyperopt.rand.pyll.stochastic.sample to draw a random sample given a Pyll program dict, i.e. input argument pyll. The returned object can be directly passed into Config's constructor by using keyword unpacking **, for example like so:

from asreviewcontrib.simulation.api import Config
from asreviewcontrib.simulation.api import draw_sample
from asreviewcontrib.simulation.api import get_pyll
from asreviewcontrib.simulation.api import OneModelConfig


fixed = {
    "ofn": OneModelConfig(abbr="ofn-wss", params={"at_pct": 90}),
    "qry": OneModelConfig(abbr="qry-max", params={"n_instances": 10}),
}

pyll = {
    "bal": get_pyll("bal-double"),
    "fex": get_pyll("fex-tfidf"),
}

# use pyll programs to draw a parameterization for 'bal' and 'fex'
drawn = draw_sample(pyll)

# construct an all-model config from one-model configs -- implicitly use default
# model choice and parameterization for models not included as argument
config = Config(**fixed, **drawn)
def get_abbrs() -> List[str]:
 9def get_abbrs() -> TAbbrs:
10    """
11    Returns:
12
13        A list of recognized model abbreviations.
14    """
15    my_abbrs = {
16        "bal-double",
17        "bal-simple",
18        "bal-undersample",
19        "clr-logistic",
20        "clr-lstm-base",
21        "clr-lstm-pool",
22        "clr-nb",
23        "clr-nn-2-layer",
24        "clr-rf",
25        "clr-svm",
26        "fex-doc2vec",
27        "fex-embedding-idf",
28        "fex-embedding-lstm",
29        "fex-sbert",
30        "fex-tfidf",
31        "ofn-none",
32        "ofn-wss",
33        "qry-cluster",
34        "qry-max",
35        "qry-max-random",
36        "qry-max-uncertainty",
37        "qry-random",
38        "qry-uncertainty",
39        "sam-handpicked",
40        "sam-random",
41        "stp-none",
42        "stp-nq",
43        "stp-rel",
44    }
45    other_abbrs = set([abbr for abbr, _ in get_quads()])
46    return sorted(my_abbrs.union(other_abbrs))
Returns:

A list of recognized model abbreviations.

def get_default_params(abbr: str) -> Dict[str, Any]:
35def get_default_params(abbr: str) -> Dict[str, Any]:
36    my_default_params_getters = {
37        "bal-double": get_bal_double_params,
38        "bal-simple": get_bal_simple_params,
39        "bal-undersample": get_bal_undersample_params,
40        "clr-logistic": get_clr_logistic_params,
41        "clr-lstm-base": get_clr_lstm_base_params,
42        "clr-lstm-pool": get_clr_lstm_pool_params,
43        "clr-nb": get_clr_nb_params,
44        "clr-nn-2-layer": get_clr_nn_2_layer_params,
45        "clr-rf": get_clr_rf_params,
46        "clr-svm": get_clr_svm_params,
47        "fex-doc2vec": get_fex_doc2vec_params,
48        "fex-embedding-idf": get_fex_embedding_idf_params,
49        "fex-embedding-lstm": get_fex_embedding_lstm_params,
50        "fex-sbert": get_fex_sbert_params,
51        "fex-tfidf": get_fex_tfidf_params,
52        "ofn-none": get_ofn_none_params,
53        "ofn-wss": get_ofn_wss_params,
54        "qry-cluster": get_qry_cluster_params,
55        "qry-max": get_qry_max_params,
56        "qry-max-random": get_qry_max_random_params,
57        "qry-max-uncertainty": get_qry_max_uncertainty_params,
58        "qry-random": get_qry_random_params,
59        "qry-uncertainty": get_qry_uncertainty_params,
60        "sam-handpicked": get_sam_handpicked_params,
61        "sam-random": get_sam_random_params,
62        "stp-none": get_stp_none_params,
63        "stp-nq": get_stp_nq_params,
64        "stp-rel": get_stp_rel_params,
65    }
66    other_default_params_getters = [{abbr: q.default_params} for abbr, q in get_quads()]
67
68    default_params_getters = my_default_params_getters
69    for other_default_params_getter in other_default_params_getters:
70        default_params_getters.update(other_default_params_getter)
71
72    try:
73        func = default_params_getters[abbr]
74    except KeyError as e:
75        abbrs = "\n".join(list(default_params_getters.keys()))
76        print(f"'{abbr}' is not a valid name for a model. Valid names are:\n{abbrs}")
77        raise e
78    return func()
def get_dataset_names() -> List[str]:
 9def get_dataset_names() -> TDatasetNames:
10    """
11    Returns:
12
13        A list of recognized dataset names.
14    """
15    dataset_names = list()
16    for group in DatasetManager().list():
17        for dataset in group["datasets"]:
18            dataset_names.append(f"{group['group_id']}:{dataset['dataset_id']}")
19    return dataset_names
Returns:

A list of recognized dataset names.

def get_pyll(abbr: str) -> hyperopt.pyll.base.Apply:
34def get_pyll(abbr: str) -> hyperopt.base.pyll.Apply:
35    """
36    Args:
37        abbr:
38            The model abbreviation.
39
40    Returns:
41        The Pyll program `dict` for a given model identified by the input argument
42        `abbr`. Pyll programs define the sample space for a given model or combination of
43        models. They are a concept from the `hyperopt` library, refer to
44        https://hyperopt.github.io/hyperopt/ for more details.
45    """
46    my_pyll_getters = {
47        "bal-double": bal_double_pyll,
48        "bal-simple": bal_simple_pyll,
49        "bal-undersample": bal_undersample_pyll,
50        "clr-logistic": clr_logistic_pyll,
51        "clr-lstm-base": clr_lstm_base_pyll,
52        "clr-lstm-pool": clr_lstm_pool_pyll,
53        "clr-nb": clr_nb_pyll,
54        "clr-nn-2-layer": clr_nn_2_layer_pyll,
55        "clr-rf": clr_rf_pyll,
56        "clr-svm": clr_svm_pyll,
57        "fex-doc2vec": fex_doc2vec_pyll,
58        "fex-embedding-idf": fex_embedding_idf_pyll,
59        "fex-embedding-lstm": fex_embedding_lstm_pyll,
60        "fex-sbert": fex_sbert_pyll,
61        "fex-tfidf": fex_tfidf_pyll,
62        "ofn-none": ofn_none_pyll,
63        "ofn-wss": ofn_wss_pyll,
64        "qry-cluster": qry_cluster_pyll,
65        "qry-max": qry_max_pyll,
66        "qry-max-random": qry_max_random_pyll,
67        "qry-max-uncertainty": qry_max_uncertainty_pyll,
68        "qry-random": qry_random_pyll,
69        "qry-uncertainty": qry_uncertainty_pyll,
70        "sam-handpicked": sam_handpicked_pyll,
71        "sam-random": sam_random_pyll,
72        "stp-none": stp_none_pyll,
73        "stp-nq": stp_nq_pyll,
74        "stp-rel": stp_rel_pyll,
75    }
76    other_pyll_getters = [{abbr: q.pyll} for abbr, q in get_quads()]
77
78    pyll_getters = my_pyll_getters
79    for other_pyll_getter in other_pyll_getters:
80        pyll_getters.update(other_pyll_getter)
81
82    try:
83        func = pyll_getters[abbr]
84    except KeyError as e:
85        abbrs = "\n".join(list(pyll_getters.keys()))
86        print(f"'{abbr}' is not a valid name for a model. Valid names are:\n{abbrs}")
87        raise e
88    return func()
Arguments:
  • abbr: The model abbreviation.
Returns:

The Pyll program dict for a given model identified by the input argument abbr. Pyll programs define the sample space for a given model or combination of models. They are a concept from the hyperopt library, refer to https://hyperopt.github.io/hyperopt/ for more details.

def prep_project_directory( benchmark: Optional[str] = None, input_file: Optional[str] = None, output_file: Optional[str] = None) -> Tuple[asreview.project.ASReviewProject, asreview.data.base.ASReviewData]:
10def prep_project_directory(
11    benchmark: Optional[str] = None, input_file: Optional[str] = None, output_file: Optional[str] = None
12) -> Tuple[ASReviewProject, ASReviewData]:
13    """
14    Args:
15        benchmark:
16            The name of the benchmark data set. You can retrieve the list of recognized
17            names using `get_dataset_names`. The function expects either `benchmark` or
18            `input_file` to be defined.
19        input_file:
20            The name of the input file. This can be a relative path from the working
21            directory, or an absolute path. The function expects either `benchmark` or
22            `input_file` to be defined. The target file should be a valid input file
23            for ASReview, see https://asreview.readthedocs.io for more information.
24        output_file:
25            Where the results from the simulation will be written.
26
27    Returns:
28        A 2-tuple of `(project, as_data)` where `project` is the `ASReviewProject` object,
29        and `as_data` is the `ASReviewData` object, see https://asreview.readthedocs.io
30        for more information on these.
31
32    Synopsis:
33
34        Prepare an `*.asreview.tmp` directory which will contain the log / state / configuration
35        of the ASReview simulation.
36    """
37    assert (benchmark is None) != (input_file is None), "Need to specify either 'benchmark' or 'input_file'"
38    if benchmark is not None:
39        assert isinstance(benchmark, str), "expected input argument 'benchmark' to be of type str"
40    if input_file is not None:
41        assert isinstance(input_file, str), "expected input argument 'input_file' to be of type str"
42    if output_file is not None:
43        assert isinstance(output_file, str), "expected input argument 'output_file' to be of type str"
44    else:
45        assert False, "Need to specify an output file"
46    assert Path(output_file).suffix == ".asreview", "'output_file' should have '.asreview' filename extension."
47    assert not Path(output_file).exists(), f"Output file '{output_file}'  already exists."
48    output_file_tmp = Path(output_file).with_suffix(".asreview.tmp")
49    assert not Path(output_file_tmp).exists(), f"Temporary file '{output_file_tmp}'  already exists."
50
51    if benchmark is not None:
52        # Use --benchmark as the source
53        as_data = load_data(benchmark)
54        if benchmark.startswith("benchmark:"):
55            dataset_path = f"{benchmark[10:]}.csv"
56        else:
57            dataset_path = f"{benchmark}.csv"
58        if len(as_data) == 0:
59            raise ValueError("Choose a benchmark dataset with at least one record.")
60    elif input_file is not None:
61        # Use --in as the source
62        as_data = load_data(input_file)
63        dataset_path = str(Path(input_file).with_suffix(".csv").name)
64        if len(as_data) == 0:
65            raise ValueError("Supply data with at least one record.")
66    else:
67        raise ValueError("Unexpected case.")
68
69    # Create the .asreview.tmp directory
70    name = Path(output_file).stem
71    project_path = Path(output_file).with_suffix(".asreview.tmp")
72    project = ASReviewProject.create(
73        project_path,
74        project_id=name,
75        project_name=name,
76        project_mode="simulate",
77        project_description="Simulation created via ASReview command line interface",
78    )
79
80    # Include a copy of the input data in the .asreview.tmp directory
81    as_data.to_file(project_path / "data" / dataset_path)
82
83    # Include the settings in the .asreview.tmp directory
84    project.update_config(dataset_path=dataset_path)
85
86    return project, as_data
Arguments:
  • benchmark: The name of the benchmark data set. You can retrieve the list of recognized names using get_dataset_names. The function expects either benchmark or input_file to be defined.
  • input_file: The name of the input file. This can be a relative path from the working directory, or an absolute path. The function expects either benchmark or input_file to be defined. The target file should be a valid input file for ASReview, see https://asreview.readthedocs.io for more information.
  • output_file: Where the results from the simulation will be written.
Returns:

A 2-tuple of (project, as_data) where project is the ASReviewProject object, and as_data is the ASReviewData object, see https://asreview.readthedocs.io for more information on these.

Synopsis:

Prepare an *.asreview.tmp directory which will contain the log / state / configuration of the ASReview simulation.

def run( config: Config, project: asreview.project.ASReviewProject, as_data: asreview.data.base.ASReviewData, write_interval: Optional[int] = None, seed: Optional[int] = None, no_zip: bool = False) -> Optional[float]:
 16def run(
 17    config: Config,
 18    project: ASReviewProject,
 19    as_data: ASReviewData,
 20    write_interval: Optional[int] = None,
 21    seed: Optional[int] = None,
 22    no_zip: bool = False,
 23) -> TOfnScore:
 24    """
 25    Args:
 26        config:
 27            The choice of 7 types of model and their parameterization.
 28        project:
 29            The `ASReviewProject` object, see https://asreview.readthedocs.io
 30        as_data:
 31            The `ASReviewData` object, see https://asreview.readthedocs.io
 32        write_interval:
 33            Interval measured in number of queries at which to
 34            write the state from memory to the state file.
 35        seed:
 36            Random seed for the simulation
 37        no_zip:
 38            Whether to forgo compressing the project temporary directory into a
 39            zipped archive once the simulation ends.
 40
 41    Returns:
 42        The objective score or `None`.
 43
 44    Synopsis:
 45
 46        Runs the ASReview simulation with the provided choice of models and their
 47        parameterization (i.e., input argument `config`).
 48
 49    Example usage:
 50
 51        ```python
 52        import os
 53        import tempfile
 54        from asreviewcontrib.simulation.api import Config
 55        from asreviewcontrib.simulation.api import OneModelConfig
 56        from asreviewcontrib.simulation.api import prep_project_directory
 57        from asreviewcontrib.simulation.api import run
 58
 59        # make a classifier model config using default parameter values
 60        # given the model name
 61        clr = OneModelConfig("clr-svm")
 62
 63        # make a query model config using positional arguments, and a
 64        # partial params dict
 65        qry = OneModelConfig("qry-max-random", {"fraction_max": 0.90})
 66
 67        # make a stopping model config using keyword arguments
 68        stp = OneModelConfig(abbr="stp-nq", params={"n_queries": 10})
 69
 70        # construct an all model config from one model configs -- implicitly
 71        # use default model choice and parameterization for models not
 72        # included as argument (i.e. sam, fex, bal, ofn)
 73        config = Config(clr=clr, qry=qry, stp=stp)
 74
 75        # arbitrarily pick a benchmark dataset
 76        benchmark = "benchmark:Cohen_2006_ADHD"
 77
 78        # create a temporary directory
 79        tmpdir = tempfile.mkdtemp(prefix="asreview-simulation.", dir=".")
 80        output_file = f"{tmpdir}{os.sep}project.asreview"
 81
 82        # prepare the directory that holds the state of the simulation
 83        project, as_data = prep_project_directory(benchmark=benchmark,
 84                                                  output_file=output_file)
 85
 86        # start the simulation
 87        run(config, project, as_data)
 88        ```
 89    """
 90
 91    # prep
 92    kwargs = get_review_simulate_kwargs(config, as_data, seed)
 93    reviewer = ReviewSimulate(as_data, project=project, **kwargs, write_interval=write_interval)
 94
 95    # run
 96    project.update_review(status="review")  # (has side effects on disk)
 97    reviewer.review()
 98    project.mark_review_finished()  # (has side effects on disk)
 99
100    # wrap-up
101    p = project.project_path
102    if no_zip:
103        # rename the .asreview.tmp directory to just .asreview
104        os.rename(p, p.with_suffix(""))
105    else:
106        # zip the results
107        project.export(p.with_suffix(""))
108        shutil.rmtree(p)
109
110    return calc_ofn_score(config.ofn, p.with_suffix(""))
Arguments:
  • config: The choice of 7 types of model and their parameterization.
  • project: The ASReviewProject object, see https://asreview.readthedocs.io
  • as_data: The ASReviewData object, see https://asreview.readthedocs.io
  • write_interval: Interval measured in number of queries at which to write the state from memory to the state file.
  • seed: Random seed for the simulation
  • no_zip: Whether to forgo compressing the project temporary directory into a zipped archive once the simulation ends.
Returns:

The objective score or None.

Synopsis:

Runs the ASReview simulation with the provided choice of models and their parameterization (i.e., input argument config).

Example usage:
import os
import tempfile
from asreviewcontrib.simulation.api import Config
from asreviewcontrib.simulation.api import OneModelConfig
from asreviewcontrib.simulation.api import prep_project_directory
from asreviewcontrib.simulation.api import run

# make a classifier model config using default parameter values
# given the model name
clr = OneModelConfig("clr-svm")

# make a query model config using positional arguments, and a
# partial params dict
qry = OneModelConfig("qry-max-random", {"fraction_max": 0.90})

# make a stopping model config using keyword arguments
stp = OneModelConfig(abbr="stp-nq", params={"n_queries": 10})

# construct an all model config from one model configs -- implicitly
# use default model choice and parameterization for models not
# included as argument (i.e. sam, fex, bal, ofn)
config = Config(clr=clr, qry=qry, stp=stp)

# arbitrarily pick a benchmark dataset
benchmark = "benchmark:Cohen_2006_ADHD"

# create a temporary directory
tmpdir = tempfile.mkdtemp(prefix="asreview-simulation.", dir=".")
output_file = f"{tmpdir}{os.sep}project.asreview"

# prepare the directory that holds the state of the simulation
project, as_data = prep_project_directory(benchmark=benchmark,
                                          output_file=output_file)

# start the simulation
run(config, project, as_data)