asreviewcontrib.simulation.api
The main Application Programming Interface (API) for the asreview-simulation package.
Example usage:
from asreviewcontrib.simulation.api import Config from asreviewcontrib.simulation.api import draw_sample from asreviewcontrib.simulation.api import get_pyll # (do something interesting with draw_sample, get_pyll, and Config)
1""" 2The main Application Programming Interface (API) for the `asreview-simulation` package. 3 4Example usage: 5 6 ```python 7 from asreviewcontrib.simulation.api import Config 8 from asreviewcontrib.simulation.api import draw_sample 9 from asreviewcontrib.simulation.api import get_pyll 10 11 12 # (do something interesting with draw_sample, get_pyll, and Config) 13 ``` 14""" 15from asreviewcontrib.simulation._private.lib.config import Config 16from asreviewcontrib.simulation._private.lib.config import OneModelConfig 17from asreviewcontrib.simulation._private.lib.draw_sample import draw_sample 18from asreviewcontrib.simulation._private.lib.get_abbrs import get_abbrs 19from asreviewcontrib.simulation._private.lib.get_dataset_names import get_dataset_names 20from asreviewcontrib.simulation._private.lib.get_default_params import get_default_params 21from asreviewcontrib.simulation._private.lib.get_pyll import get_pyll 22from asreviewcontrib.simulation._private.lib.prep_project_directory import prep_project_directory 23from asreviewcontrib.simulation._private.lib.run import run 24from asreviewcontrib.simulation.api import extending 25from asreviewcontrib.simulation.api import plotting 26from asreviewcontrib.simulation.api import unwrapping 27 28 29__all__ = [ 30 "Config", 31 "OneModelConfig", 32 "extending", 33 "draw_sample", 34 "get_abbrs", 35 "get_default_params", 36 "get_dataset_names", 37 "get_pyll", 38 "plotting", 39 "prep_project_directory", 40 "run", 41 "unwrapping", 42]
127class Config: 128 """Stores the configuration for all 7 types of model necessary to run an ASReview simulation.""" 129 130 _errmsg = "Expected an instance of OneModelConfig" 131 132 def __init__( 133 self, 134 *, 135 bal: Optional[OneModelConfig] = None, 136 clr: Optional[OneModelConfig] = None, 137 fex: Optional[OneModelConfig] = None, 138 ofn: Optional[OneModelConfig] = None, 139 qry: Optional[OneModelConfig] = None, 140 sam: Optional[OneModelConfig] = None, 141 stp: Optional[OneModelConfig] = None, 142 ): 143 """ 144 Args: 145 bal: 146 The configuration for the balancer model. 147 clr: 148 The configuration for the classifier model. 149 fex: 150 The configuration for the feature extraction model. 151 ofn: 152 The configuration for the objective function model. 153 qry: 154 The configuration for the query model. 155 sam: 156 The configuration for the prior sampling model. 157 stp: 158 The configuration for the stopping model. 159 160 Synopsis: 161 162 Constructor method. 163 164 Example usage: 165 166 1. Default choice for each model type, default configuration for each model. 167 ```python 168 from asreviewcontrib.simulation.api import Config 169 170 171 config = Config() 172 ``` 173 2. Default choice for each model type except balancer, default 174 parameter values for all models. 175 ```python 176 from asreviewcontrib.simulation.api import Config 177 from asreviewcontrib.simulation.api import OneModelConfig 178 179 180 bal = OneModelConfig(abbr="bal-simple") 181 config = Config(bal=bal) 182 ``` 183 3. Custom model choice for sampling model and for stopping model, 184 other model types use their default choice and parameterization. 185 ```python 186 from asreviewcontrib.simulation.api import Config 187 from asreviewcontrib.simulation.api import OneModelConfig 188 189 190 custom = { 191 "sam": OneModelConfig( 192 abbr="sam-random", 193 params={ 194 "n_excluded": 10, 195 "n_included": 10, 196 } 197 ), 198 "stp": OneModelConfig(abbr="stp-nq"), 199 } 200 config = Config(**custom) 201 ``` 202 """ 203 204 # use the setter methods to assign constructor arguments to private attributes 205 self.bal = bal or OneModelConfig("bal-double") 206 self.clr = clr or OneModelConfig("clr-nb") 207 self.fex = fex or OneModelConfig("fex-tfidf") 208 self.ofn = ofn or OneModelConfig("ofn-none") 209 self.qry = qry or OneModelConfig("qry-max") 210 self.sam = sam or OneModelConfig("sam-random") 211 self.stp = stp or OneModelConfig("stp-rel") 212 213 def __eq__(self, other) -> bool: 214 """Tests whether all of `self`'s 7 model configurations are equal to those 215 of the `other` instance.""" 216 assert isinstance(other, Config), "Can only compare to objects of equal type." 217 return False not in { 218 self._bal == other._bal, 219 self._clr == other._clr, 220 self._fex == other._fex, 221 self._ofn == other._ofn, 222 self._qry == other._qry, 223 self._sam == other._sam, 224 self._stp == other._stp, 225 } 226 227 def as_dict(self, recurse=True) -> ConfigDict: 228 """ 229 Returns: 230 231 A `dict` representation of the 7 different model configurations. When `recurse` 232 is `True` (as is the default), each of the 7 `OneModelConfig`s are themselves also turned 233 into a `dict` using their `.as_dict()` method; if `recurse` is `False`, the returned object 234 is a `dict` whose values are of class `OneModelConfig`. 235 """ 236 return { 237 "bal": self._bal.as_dict() if recurse else self._bal, 238 "clr": self._clr.as_dict() if recurse else self._clr, 239 "fex": self._fex.as_dict() if recurse else self._fex, 240 "ofn": self._ofn.as_dict() if recurse else self._ofn, 241 "qry": self._qry.as_dict() if recurse else self._qry, 242 "sam": self._sam.as_dict() if recurse else self._stp, 243 "stp": self._stp.as_dict() if recurse else self._sam, 244 } 245 246 def flattened(self) -> Dict[str, Any]: 247 """ 248 Returns: 249 250 A flattened version of the 7 model configurations as a `dict` whose keys consist 251 of the name of the model and the name of its parameters. 252 """ 253 d = {} 254 d.update(self.bal.flattened()) 255 d.update(self.clr.flattened()) 256 d.update(self.fex.flattened()) 257 d.update(self.ofn.flattened()) 258 d.update(self.qry.flattened()) 259 d.update(self.sam.flattened()) 260 d.update(self.stp.flattened()) 261 return d 262 263 @property 264 def bal(self) -> OneModelConfig: 265 """The configuration for the balancer model.""" 266 return self._bal 267 268 @bal.setter 269 def bal(self, bal: OneModelConfig): 270 """The configuration for the balancer model.""" 271 assert isinstance(bal, OneModelConfig), Config._errmsg 272 assert bal.abbr.startswith("bal"), "Expected a balancer model." 273 self._bal = bal 274 275 @property 276 def clr(self) -> OneModelConfig: 277 """The configuration for the classifier model.""" 278 return self._clr 279 280 @clr.setter 281 def clr(self, clr: OneModelConfig): 282 """The configuration for the classifier model.""" 283 assert isinstance(clr, OneModelConfig), Config._errmsg 284 assert clr.abbr.startswith("clr"), "Expected a classifier model." 285 self._clr = clr 286 287 @property 288 def fex(self) -> OneModelConfig: 289 """The configuration for the feature extraction model.""" 290 return self._fex 291 292 @fex.setter 293 def fex(self, fex: OneModelConfig): 294 """The configuration for the feature extraction model.""" 295 assert isinstance(fex, OneModelConfig), Config._errmsg 296 assert fex.abbr.startswith("fex"), "Expected a feature extraction model." 297 self._fex = fex 298 299 @property 300 def ofn(self) -> OneModelConfig: 301 """The configuration for the objective function model.""" 302 return self._ofn 303 304 @ofn.setter 305 def ofn(self, ofn: OneModelConfig): 306 """The configuration for the objective function model.""" 307 assert isinstance(ofn, OneModelConfig), Config._errmsg 308 assert ofn.abbr.startswith("ofn"), "Expected an objective function model." 309 self._ofn = ofn 310 311 @property 312 def qry(self) -> OneModelConfig: 313 """The configuration for the query model.""" 314 return self._qry 315 316 @qry.setter 317 def qry(self, qry: OneModelConfig): 318 """The configuration for the query model.""" 319 assert isinstance(qry, OneModelConfig), Config._errmsg 320 assert qry.abbr.startswith("qry"), "Expected a query model." 321 self._qry = qry 322 323 @property 324 def sam(self) -> OneModelConfig: 325 """The configuration for the sampling model.""" 326 return self._sam 327 328 @sam.setter 329 def sam(self, sam: OneModelConfig): 330 """The configuration for the sampling model.""" 331 assert isinstance(sam, OneModelConfig), Config._errmsg 332 assert sam.abbr.startswith("sam"), "Expected a sampler model." 333 self._sam = sam 334 335 @property 336 def stp(self) -> OneModelConfig: 337 """The configuration for the stopping model.""" 338 return self._stp 339 340 @stp.setter 341 def stp(self, stp: OneModelConfig): 342 """The configuration for the stopping model.""" 343 assert isinstance(stp, OneModelConfig), Config._errmsg 344 assert stp.abbr.startswith("stp"), "Expected a stopping model." 345 self._stp = stp
Stores the configuration for all 7 types of model necessary to run an ASReview simulation.
132 def __init__( 133 self, 134 *, 135 bal: Optional[OneModelConfig] = None, 136 clr: Optional[OneModelConfig] = None, 137 fex: Optional[OneModelConfig] = None, 138 ofn: Optional[OneModelConfig] = None, 139 qry: Optional[OneModelConfig] = None, 140 sam: Optional[OneModelConfig] = None, 141 stp: Optional[OneModelConfig] = None, 142 ): 143 """ 144 Args: 145 bal: 146 The configuration for the balancer model. 147 clr: 148 The configuration for the classifier model. 149 fex: 150 The configuration for the feature extraction model. 151 ofn: 152 The configuration for the objective function model. 153 qry: 154 The configuration for the query model. 155 sam: 156 The configuration for the prior sampling model. 157 stp: 158 The configuration for the stopping model. 159 160 Synopsis: 161 162 Constructor method. 163 164 Example usage: 165 166 1. Default choice for each model type, default configuration for each model. 167 ```python 168 from asreviewcontrib.simulation.api import Config 169 170 171 config = Config() 172 ``` 173 2. Default choice for each model type except balancer, default 174 parameter values for all models. 175 ```python 176 from asreviewcontrib.simulation.api import Config 177 from asreviewcontrib.simulation.api import OneModelConfig 178 179 180 bal = OneModelConfig(abbr="bal-simple") 181 config = Config(bal=bal) 182 ``` 183 3. Custom model choice for sampling model and for stopping model, 184 other model types use their default choice and parameterization. 185 ```python 186 from asreviewcontrib.simulation.api import Config 187 from asreviewcontrib.simulation.api import OneModelConfig 188 189 190 custom = { 191 "sam": OneModelConfig( 192 abbr="sam-random", 193 params={ 194 "n_excluded": 10, 195 "n_included": 10, 196 } 197 ), 198 "stp": OneModelConfig(abbr="stp-nq"), 199 } 200 config = Config(**custom) 201 ``` 202 """ 203 204 # use the setter methods to assign constructor arguments to private attributes 205 self.bal = bal or OneModelConfig("bal-double") 206 self.clr = clr or OneModelConfig("clr-nb") 207 self.fex = fex or OneModelConfig("fex-tfidf") 208 self.ofn = ofn or OneModelConfig("ofn-none") 209 self.qry = qry or OneModelConfig("qry-max") 210 self.sam = sam or OneModelConfig("sam-random") 211 self.stp = stp or OneModelConfig("stp-rel")
Arguments:
- bal: The configuration for the balancer model.
- clr: The configuration for the classifier model.
- fex: The configuration for the feature extraction model.
- ofn: The configuration for the objective function model.
- qry: The configuration for the query model.
- sam: The configuration for the prior sampling model.
- stp: The configuration for the stopping model.
Synopsis:
Constructor method.
Example usage:
Default choice for each model type, default configuration for each model.
from asreviewcontrib.simulation.api import Config config = Config()Default choice for each model type except balancer, default parameter values for all models.
from asreviewcontrib.simulation.api import Config from asreviewcontrib.simulation.api import OneModelConfig bal = OneModelConfig(abbr="bal-simple") config = Config(bal=bal)Custom model choice for sampling model and for stopping model, other model types use their default choice and parameterization.
from asreviewcontrib.simulation.api import Config from asreviewcontrib.simulation.api import OneModelConfig custom = { "sam": OneModelConfig( abbr="sam-random", params={ "n_excluded": 10, "n_included": 10, } ), "stp": OneModelConfig(abbr="stp-nq"), } config = Config(**custom)
263 @property 264 def bal(self) -> OneModelConfig: 265 """The configuration for the balancer model.""" 266 return self._bal
The configuration for the balancer model.
275 @property 276 def clr(self) -> OneModelConfig: 277 """The configuration for the classifier model.""" 278 return self._clr
The configuration for the classifier model.
287 @property 288 def fex(self) -> OneModelConfig: 289 """The configuration for the feature extraction model.""" 290 return self._fex
The configuration for the feature extraction model.
299 @property 300 def ofn(self) -> OneModelConfig: 301 """The configuration for the objective function model.""" 302 return self._ofn
The configuration for the objective function model.
311 @property 312 def qry(self) -> OneModelConfig: 313 """The configuration for the query model.""" 314 return self._qry
The configuration for the query model.
323 @property 324 def sam(self) -> OneModelConfig: 325 """The configuration for the sampling model.""" 326 return self._sam
The configuration for the sampling model.
335 @property 336 def stp(self) -> OneModelConfig: 337 """The configuration for the stopping model.""" 338 return self._stp
The configuration for the stopping model.
227 def as_dict(self, recurse=True) -> ConfigDict: 228 """ 229 Returns: 230 231 A `dict` representation of the 7 different model configurations. When `recurse` 232 is `True` (as is the default), each of the 7 `OneModelConfig`s are themselves also turned 233 into a `dict` using their `.as_dict()` method; if `recurse` is `False`, the returned object 234 is a `dict` whose values are of class `OneModelConfig`. 235 """ 236 return { 237 "bal": self._bal.as_dict() if recurse else self._bal, 238 "clr": self._clr.as_dict() if recurse else self._clr, 239 "fex": self._fex.as_dict() if recurse else self._fex, 240 "ofn": self._ofn.as_dict() if recurse else self._ofn, 241 "qry": self._qry.as_dict() if recurse else self._qry, 242 "sam": self._sam.as_dict() if recurse else self._stp, 243 "stp": self._stp.as_dict() if recurse else self._sam, 244 }
Returns:
A
dictrepresentation of the 7 different model configurations. WhenrecurseisTrue(as is the default), each of the 7OneModelConfigs are themselves also turned into adictusing their.as_dict()method; ifrecurseisFalse, the returned object is adictwhose values are of classOneModelConfig.
246 def flattened(self) -> Dict[str, Any]: 247 """ 248 Returns: 249 250 A flattened version of the 7 model configurations as a `dict` whose keys consist 251 of the name of the model and the name of its parameters. 252 """ 253 d = {} 254 d.update(self.bal.flattened()) 255 d.update(self.clr.flattened()) 256 d.update(self.fex.flattened()) 257 d.update(self.ofn.flattened()) 258 d.update(self.qry.flattened()) 259 d.update(self.sam.flattened()) 260 d.update(self.stp.flattened()) 261 return d
Returns:
A flattened version of the 7 model configurations as a
dictwhose keys consist of the name of the model and the name of its parameters.
18class OneModelConfig: 19 """ 20 Stores the configuration for one model, e.g. the balancer, the stopping model, 21 or the objective function model. 22 """ 23 24 def __init__(self, abbr: str, params: Optional[TParams] = None): 25 """ 26 Args: 27 abbr: 28 The model abbreviation. 29 params: 30 The model parameters. 31 32 Synopsis: 33 34 Constructor method. 35 36 Example usage: 37 38 1. Default parameter values given the choice for `bal-simple`. 39 ```python 40 from asreviewcontrib.simulation.api import OneModelConfig 41 42 43 bal = OneModelConfig(abbr="bal-simple") 44 ``` 45 2. Custom parameter values for the selected model choice `stp-nq`. 46 ```python 47 from asreviewcontrib.simulation.api import OneModelConfig 48 49 50 stp = OneModelConfig(abbr="stp-nq", params={"n_queries: 20"}) 51 ``` 52 3. Partially custom parameter values for the selected model choice `qry-max-random`. 53 ```python 54 from asreviewcontrib.simulation.api import OneModelConfig 55 56 57 qry = OneModelConfig(abbr="qry-max-random", params={"n_instances: 10"}) 58 ``` 59 """ 60 assert isinstance(abbr, str), "Expected input argument 'abbr' to be of type 'str'" 61 default_params: TParams = get_default_params(abbr) 62 self._abbr = abbr 63 self._params = default_params 64 if params is not None: 65 assert isinstance(params, dict), "Expected input argument 'params' to be of type 'dict'" 66 valid_keys = default_params.keys() 67 provided_keys = params.keys() 68 for key in provided_keys: 69 assert key in valid_keys, f"Can't update parameters for model '{self._abbr}' using unknown key '{key}'." 70 self._params[key] = params[key] 71 72 def __eq__(self, other: Any) -> bool: 73 """Test whether this instance of `OneModelConfig` is exactly equal to the `other` instance 74 with respect to the set of key names and their values.""" 75 assert isinstance(other, OneModelConfig), "Can only compare to objects of equal type." 76 cond1 = self._abbr == other._abbr 77 cond2 = set(self._params) == set(other._params) 78 cond3 = False not in {self._params[k] == other._params[k] for k in self._params.keys()} 79 return cond1 and cond2 and cond3 80 81 def as_dict(self) -> OneModelConfigDict: 82 """ 83 Returns: 84 85 The model configuration as a `dict`. 86 """ 87 return { 88 "abbr": self._abbr, 89 "params": self._params, 90 } 91 92 def flattened(self) -> Dict[str, Any]: 93 """ 94 Returns: 95 96 A flattened version of the model configuration as a `dict` whose keys consist 97 of the name of the model and the name of its parameters. 98 """ 99 d = {} 100 for param in self.params.keys(): 101 k = "/".join([self.abbr, param]) 102 v = self.params[param] 103 d.update({k: v}) 104 return d 105 106 @property 107 def abbr(self) -> str: 108 """The model abbreviation (read-only).""" 109 return self._abbr 110 111 @property 112 def params(self) -> TParams: 113 """The model parameterization (read-only).""" 114 return self._params
Stores the configuration for one model, e.g. the balancer, the stopping model, or the objective function model.
24 def __init__(self, abbr: str, params: Optional[TParams] = None): 25 """ 26 Args: 27 abbr: 28 The model abbreviation. 29 params: 30 The model parameters. 31 32 Synopsis: 33 34 Constructor method. 35 36 Example usage: 37 38 1. Default parameter values given the choice for `bal-simple`. 39 ```python 40 from asreviewcontrib.simulation.api import OneModelConfig 41 42 43 bal = OneModelConfig(abbr="bal-simple") 44 ``` 45 2. Custom parameter values for the selected model choice `stp-nq`. 46 ```python 47 from asreviewcontrib.simulation.api import OneModelConfig 48 49 50 stp = OneModelConfig(abbr="stp-nq", params={"n_queries: 20"}) 51 ``` 52 3. Partially custom parameter values for the selected model choice `qry-max-random`. 53 ```python 54 from asreviewcontrib.simulation.api import OneModelConfig 55 56 57 qry = OneModelConfig(abbr="qry-max-random", params={"n_instances: 10"}) 58 ``` 59 """ 60 assert isinstance(abbr, str), "Expected input argument 'abbr' to be of type 'str'" 61 default_params: TParams = get_default_params(abbr) 62 self._abbr = abbr 63 self._params = default_params 64 if params is not None: 65 assert isinstance(params, dict), "Expected input argument 'params' to be of type 'dict'" 66 valid_keys = default_params.keys() 67 provided_keys = params.keys() 68 for key in provided_keys: 69 assert key in valid_keys, f"Can't update parameters for model '{self._abbr}' using unknown key '{key}'." 70 self._params[key] = params[key]
Arguments:
- abbr: The model abbreviation.
- params: The model parameters.
Synopsis:
Constructor method.
Example usage:
Default parameter values given the choice for
bal-simple.from asreviewcontrib.simulation.api import OneModelConfig bal = OneModelConfig(abbr="bal-simple")Custom parameter values for the selected model choice
stp-nq.from asreviewcontrib.simulation.api import OneModelConfig stp = OneModelConfig(abbr="stp-nq", params={"n_queries: 20"})Partially custom parameter values for the selected model choice
qry-max-random.from asreviewcontrib.simulation.api import OneModelConfig qry = OneModelConfig(abbr="qry-max-random", params={"n_instances: 10"})
81 def as_dict(self) -> OneModelConfigDict: 82 """ 83 Returns: 84 85 The model configuration as a `dict`. 86 """ 87 return { 88 "abbr": self._abbr, 89 "params": self._params, 90 }
Returns:
The model configuration as a
dict.
92 def flattened(self) -> Dict[str, Any]: 93 """ 94 Returns: 95 96 A flattened version of the model configuration as a `dict` whose keys consist 97 of the name of the model and the name of its parameters. 98 """ 99 d = {} 100 for param in self.params.keys(): 101 k = "/".join([self.abbr, param]) 102 v = self.params[param] 103 d.update({k: v}) 104 return d
Returns:
A flattened version of the model configuration as a
dictwhose keys consist of the name of the model and the name of its parameters.
11def draw_sample(pyll: TPyllDict) -> TSampled: 12 """ 13 Args: 14 pyll: 15 The Pyll program `dict`. 16 17 Returns: 18 19 A dictionary with model type abbreviation for each key (`bal`, `fex`, 20 `stp`, etc), and the corresponding parameterization as randomly drawn 21 by `hyperopt.rand.pyll.stochastic.sample`. 22 23 Synopsis: 24 25 Convenience function around `hyperopt.rand.pyll.stochastic.sample` to draw 26 a random sample given a Pyll program `dict`, i.e. input argument `pyll`. The returned 27 object can be directly passed into `Config`'s constructor by using keyword unpacking 28 `**`, for example like so: 29 30 ```python 31 from asreviewcontrib.simulation.api import Config 32 from asreviewcontrib.simulation.api import draw_sample 33 from asreviewcontrib.simulation.api import get_pyll 34 from asreviewcontrib.simulation.api import OneModelConfig 35 36 37 fixed = { 38 "ofn": OneModelConfig(abbr="ofn-wss", params={"at_pct": 90}), 39 "qry": OneModelConfig(abbr="qry-max", params={"n_instances": 10}), 40 } 41 42 pyll = { 43 "bal": get_pyll("bal-double"), 44 "fex": get_pyll("fex-tfidf"), 45 } 46 47 # use pyll programs to draw a parameterization for 'bal' and 'fex' 48 drawn = draw_sample(pyll) 49 50 # construct an all-model config from one-model configs -- implicitly use default 51 # model choice and parameterization for models not included as argument 52 config = Config(**fixed, **drawn) 53 ``` 54 """ 55 valid_keys = {"bal", "clr", "fex", "ofn", "qry", "sam", "stp"} 56 assert isinstance(pyll, dict), "Expected input argument pyll to be of type 'dict'." 57 for key in pyll.keys(): 58 assert key in valid_keys, f"Unexpected key '{key}' in keys of input argument 'pyll'." 59 sample = hyperopt.rand.pyll.stochastic.sample(pyll) 60 d = {} 61 for key in sample.keys(): 62 abbr = sample[key]["abbr"] 63 params = sample[key]["params"] 64 pair = { 65 key: OneModelConfig(abbr=abbr, params=params), 66 } 67 d.update(pair) 68 return d
Arguments:
- pyll: The Pyll program
dict.
Returns:
A dictionary with model type abbreviation for each key (
bal,fex,stp, etc), and the corresponding parameterization as randomly drawn byhyperopt.rand.pyll.stochastic.sample.
Synopsis:
Convenience function around
hyperopt.rand.pyll.stochastic.sampleto draw a random sample given a Pyll programdict, i.e. input argumentpyll. The returned object can be directly passed intoConfig's constructor by using keyword unpacking**, for example like so:from asreviewcontrib.simulation.api import Config from asreviewcontrib.simulation.api import draw_sample from asreviewcontrib.simulation.api import get_pyll from asreviewcontrib.simulation.api import OneModelConfig fixed = { "ofn": OneModelConfig(abbr="ofn-wss", params={"at_pct": 90}), "qry": OneModelConfig(abbr="qry-max", params={"n_instances": 10}), } pyll = { "bal": get_pyll("bal-double"), "fex": get_pyll("fex-tfidf"), } # use pyll programs to draw a parameterization for 'bal' and 'fex' drawn = draw_sample(pyll) # construct an all-model config from one-model configs -- implicitly use default # model choice and parameterization for models not included as argument config = Config(**fixed, **drawn)
9def get_abbrs() -> TAbbrs: 10 """ 11 Returns: 12 13 A list of recognized model abbreviations. 14 """ 15 my_abbrs = { 16 "bal-double", 17 "bal-simple", 18 "bal-undersample", 19 "clr-logistic", 20 "clr-lstm-base", 21 "clr-lstm-pool", 22 "clr-nb", 23 "clr-nn-2-layer", 24 "clr-rf", 25 "clr-svm", 26 "fex-doc2vec", 27 "fex-embedding-idf", 28 "fex-embedding-lstm", 29 "fex-sbert", 30 "fex-tfidf", 31 "ofn-none", 32 "ofn-wss", 33 "qry-cluster", 34 "qry-max", 35 "qry-max-random", 36 "qry-max-uncertainty", 37 "qry-random", 38 "qry-uncertainty", 39 "sam-handpicked", 40 "sam-random", 41 "stp-none", 42 "stp-nq", 43 "stp-rel", 44 } 45 other_abbrs = set([abbr for abbr, _ in get_quads()]) 46 return sorted(my_abbrs.union(other_abbrs))
Returns:
A list of recognized model abbreviations.
35def get_default_params(abbr: str) -> Dict[str, Any]: 36 my_default_params_getters = { 37 "bal-double": get_bal_double_params, 38 "bal-simple": get_bal_simple_params, 39 "bal-undersample": get_bal_undersample_params, 40 "clr-logistic": get_clr_logistic_params, 41 "clr-lstm-base": get_clr_lstm_base_params, 42 "clr-lstm-pool": get_clr_lstm_pool_params, 43 "clr-nb": get_clr_nb_params, 44 "clr-nn-2-layer": get_clr_nn_2_layer_params, 45 "clr-rf": get_clr_rf_params, 46 "clr-svm": get_clr_svm_params, 47 "fex-doc2vec": get_fex_doc2vec_params, 48 "fex-embedding-idf": get_fex_embedding_idf_params, 49 "fex-embedding-lstm": get_fex_embedding_lstm_params, 50 "fex-sbert": get_fex_sbert_params, 51 "fex-tfidf": get_fex_tfidf_params, 52 "ofn-none": get_ofn_none_params, 53 "ofn-wss": get_ofn_wss_params, 54 "qry-cluster": get_qry_cluster_params, 55 "qry-max": get_qry_max_params, 56 "qry-max-random": get_qry_max_random_params, 57 "qry-max-uncertainty": get_qry_max_uncertainty_params, 58 "qry-random": get_qry_random_params, 59 "qry-uncertainty": get_qry_uncertainty_params, 60 "sam-handpicked": get_sam_handpicked_params, 61 "sam-random": get_sam_random_params, 62 "stp-none": get_stp_none_params, 63 "stp-nq": get_stp_nq_params, 64 "stp-rel": get_stp_rel_params, 65 } 66 other_default_params_getters = [{abbr: q.default_params} for abbr, q in get_quads()] 67 68 default_params_getters = my_default_params_getters 69 for other_default_params_getter in other_default_params_getters: 70 default_params_getters.update(other_default_params_getter) 71 72 try: 73 func = default_params_getters[abbr] 74 except KeyError as e: 75 abbrs = "\n".join(list(default_params_getters.keys())) 76 print(f"'{abbr}' is not a valid name for a model. Valid names are:\n{abbrs}") 77 raise e 78 return func()
9def get_dataset_names() -> TDatasetNames: 10 """ 11 Returns: 12 13 A list of recognized dataset names. 14 """ 15 dataset_names = list() 16 for group in DatasetManager().list(): 17 for dataset in group["datasets"]: 18 dataset_names.append(f"{group['group_id']}:{dataset['dataset_id']}") 19 return dataset_names
Returns:
A list of recognized dataset names.
34def get_pyll(abbr: str) -> hyperopt.base.pyll.Apply: 35 """ 36 Args: 37 abbr: 38 The model abbreviation. 39 40 Returns: 41 The Pyll program `dict` for a given model identified by the input argument 42 `abbr`. Pyll programs define the sample space for a given model or combination of 43 models. They are a concept from the `hyperopt` library, refer to 44 https://hyperopt.github.io/hyperopt/ for more details. 45 """ 46 my_pyll_getters = { 47 "bal-double": bal_double_pyll, 48 "bal-simple": bal_simple_pyll, 49 "bal-undersample": bal_undersample_pyll, 50 "clr-logistic": clr_logistic_pyll, 51 "clr-lstm-base": clr_lstm_base_pyll, 52 "clr-lstm-pool": clr_lstm_pool_pyll, 53 "clr-nb": clr_nb_pyll, 54 "clr-nn-2-layer": clr_nn_2_layer_pyll, 55 "clr-rf": clr_rf_pyll, 56 "clr-svm": clr_svm_pyll, 57 "fex-doc2vec": fex_doc2vec_pyll, 58 "fex-embedding-idf": fex_embedding_idf_pyll, 59 "fex-embedding-lstm": fex_embedding_lstm_pyll, 60 "fex-sbert": fex_sbert_pyll, 61 "fex-tfidf": fex_tfidf_pyll, 62 "ofn-none": ofn_none_pyll, 63 "ofn-wss": ofn_wss_pyll, 64 "qry-cluster": qry_cluster_pyll, 65 "qry-max": qry_max_pyll, 66 "qry-max-random": qry_max_random_pyll, 67 "qry-max-uncertainty": qry_max_uncertainty_pyll, 68 "qry-random": qry_random_pyll, 69 "qry-uncertainty": qry_uncertainty_pyll, 70 "sam-handpicked": sam_handpicked_pyll, 71 "sam-random": sam_random_pyll, 72 "stp-none": stp_none_pyll, 73 "stp-nq": stp_nq_pyll, 74 "stp-rel": stp_rel_pyll, 75 } 76 other_pyll_getters = [{abbr: q.pyll} for abbr, q in get_quads()] 77 78 pyll_getters = my_pyll_getters 79 for other_pyll_getter in other_pyll_getters: 80 pyll_getters.update(other_pyll_getter) 81 82 try: 83 func = pyll_getters[abbr] 84 except KeyError as e: 85 abbrs = "\n".join(list(pyll_getters.keys())) 86 print(f"'{abbr}' is not a valid name for a model. Valid names are:\n{abbrs}") 87 raise e 88 return func()
Arguments:
- abbr: The model abbreviation.
Returns:
The Pyll program
dictfor a given model identified by the input argumentabbr. Pyll programs define the sample space for a given model or combination of models. They are a concept from thehyperoptlibrary, refer to https://hyperopt.github.io/hyperopt/ for more details.
10def prep_project_directory( 11 benchmark: Optional[str] = None, input_file: Optional[str] = None, output_file: Optional[str] = None 12) -> Tuple[ASReviewProject, ASReviewData]: 13 """ 14 Args: 15 benchmark: 16 The name of the benchmark data set. You can retrieve the list of recognized 17 names using `get_dataset_names`. The function expects either `benchmark` or 18 `input_file` to be defined. 19 input_file: 20 The name of the input file. This can be a relative path from the working 21 directory, or an absolute path. The function expects either `benchmark` or 22 `input_file` to be defined. The target file should be a valid input file 23 for ASReview, see https://asreview.readthedocs.io for more information. 24 output_file: 25 Where the results from the simulation will be written. 26 27 Returns: 28 A 2-tuple of `(project, as_data)` where `project` is the `ASReviewProject` object, 29 and `as_data` is the `ASReviewData` object, see https://asreview.readthedocs.io 30 for more information on these. 31 32 Synopsis: 33 34 Prepare an `*.asreview.tmp` directory which will contain the log / state / configuration 35 of the ASReview simulation. 36 """ 37 assert (benchmark is None) != (input_file is None), "Need to specify either 'benchmark' or 'input_file'" 38 if benchmark is not None: 39 assert isinstance(benchmark, str), "expected input argument 'benchmark' to be of type str" 40 if input_file is not None: 41 assert isinstance(input_file, str), "expected input argument 'input_file' to be of type str" 42 if output_file is not None: 43 assert isinstance(output_file, str), "expected input argument 'output_file' to be of type str" 44 else: 45 assert False, "Need to specify an output file" 46 assert Path(output_file).suffix == ".asreview", "'output_file' should have '.asreview' filename extension." 47 assert not Path(output_file).exists(), f"Output file '{output_file}' already exists." 48 output_file_tmp = Path(output_file).with_suffix(".asreview.tmp") 49 assert not Path(output_file_tmp).exists(), f"Temporary file '{output_file_tmp}' already exists." 50 51 if benchmark is not None: 52 # Use --benchmark as the source 53 as_data = load_data(benchmark) 54 if benchmark.startswith("benchmark:"): 55 dataset_path = f"{benchmark[10:]}.csv" 56 else: 57 dataset_path = f"{benchmark}.csv" 58 if len(as_data) == 0: 59 raise ValueError("Choose a benchmark dataset with at least one record.") 60 elif input_file is not None: 61 # Use --in as the source 62 as_data = load_data(input_file) 63 dataset_path = str(Path(input_file).with_suffix(".csv").name) 64 if len(as_data) == 0: 65 raise ValueError("Supply data with at least one record.") 66 else: 67 raise ValueError("Unexpected case.") 68 69 # Create the .asreview.tmp directory 70 name = Path(output_file).stem 71 project_path = Path(output_file).with_suffix(".asreview.tmp") 72 project = ASReviewProject.create( 73 project_path, 74 project_id=name, 75 project_name=name, 76 project_mode="simulate", 77 project_description="Simulation created via ASReview command line interface", 78 ) 79 80 # Include a copy of the input data in the .asreview.tmp directory 81 as_data.to_file(project_path / "data" / dataset_path) 82 83 # Include the settings in the .asreview.tmp directory 84 project.update_config(dataset_path=dataset_path) 85 86 return project, as_data
Arguments:
- benchmark: The name of the benchmark data set. You can retrieve the list of recognized
names using
get_dataset_names. The function expects eitherbenchmarkorinput_fileto be defined. - input_file: The name of the input file. This can be a relative path from the working
directory, or an absolute path. The function expects either
benchmarkorinput_fileto be defined. The target file should be a valid input file for ASReview, see https://asreview.readthedocs.io for more information. - output_file: Where the results from the simulation will be written.
Returns:
A 2-tuple of
(project, as_data)whereprojectis theASReviewProjectobject, andas_datais theASReviewDataobject, see https://asreview.readthedocs.io for more information on these.
Synopsis:
Prepare an
*.asreview.tmpdirectory which will contain the log / state / configuration of the ASReview simulation.
16def run( 17 config: Config, 18 project: ASReviewProject, 19 as_data: ASReviewData, 20 write_interval: Optional[int] = None, 21 seed: Optional[int] = None, 22 no_zip: bool = False, 23) -> TOfnScore: 24 """ 25 Args: 26 config: 27 The choice of 7 types of model and their parameterization. 28 project: 29 The `ASReviewProject` object, see https://asreview.readthedocs.io 30 as_data: 31 The `ASReviewData` object, see https://asreview.readthedocs.io 32 write_interval: 33 Interval measured in number of queries at which to 34 write the state from memory to the state file. 35 seed: 36 Random seed for the simulation 37 no_zip: 38 Whether to forgo compressing the project temporary directory into a 39 zipped archive once the simulation ends. 40 41 Returns: 42 The objective score or `None`. 43 44 Synopsis: 45 46 Runs the ASReview simulation with the provided choice of models and their 47 parameterization (i.e., input argument `config`). 48 49 Example usage: 50 51 ```python 52 import os 53 import tempfile 54 from asreviewcontrib.simulation.api import Config 55 from asreviewcontrib.simulation.api import OneModelConfig 56 from asreviewcontrib.simulation.api import prep_project_directory 57 from asreviewcontrib.simulation.api import run 58 59 # make a classifier model config using default parameter values 60 # given the model name 61 clr = OneModelConfig("clr-svm") 62 63 # make a query model config using positional arguments, and a 64 # partial params dict 65 qry = OneModelConfig("qry-max-random", {"fraction_max": 0.90}) 66 67 # make a stopping model config using keyword arguments 68 stp = OneModelConfig(abbr="stp-nq", params={"n_queries": 10}) 69 70 # construct an all model config from one model configs -- implicitly 71 # use default model choice and parameterization for models not 72 # included as argument (i.e. sam, fex, bal, ofn) 73 config = Config(clr=clr, qry=qry, stp=stp) 74 75 # arbitrarily pick a benchmark dataset 76 benchmark = "benchmark:Cohen_2006_ADHD" 77 78 # create a temporary directory 79 tmpdir = tempfile.mkdtemp(prefix="asreview-simulation.", dir=".") 80 output_file = f"{tmpdir}{os.sep}project.asreview" 81 82 # prepare the directory that holds the state of the simulation 83 project, as_data = prep_project_directory(benchmark=benchmark, 84 output_file=output_file) 85 86 # start the simulation 87 run(config, project, as_data) 88 ``` 89 """ 90 91 # prep 92 kwargs = get_review_simulate_kwargs(config, as_data, seed) 93 reviewer = ReviewSimulate(as_data, project=project, **kwargs, write_interval=write_interval) 94 95 # run 96 project.update_review(status="review") # (has side effects on disk) 97 reviewer.review() 98 project.mark_review_finished() # (has side effects on disk) 99 100 # wrap-up 101 p = project.project_path 102 if no_zip: 103 # rename the .asreview.tmp directory to just .asreview 104 os.rename(p, p.with_suffix("")) 105 else: 106 # zip the results 107 project.export(p.with_suffix("")) 108 shutil.rmtree(p) 109 110 return calc_ofn_score(config.ofn, p.with_suffix(""))
Arguments:
- config: The choice of 7 types of model and their parameterization.
- project: The
ASReviewProjectobject, see https://asreview.readthedocs.io - as_data: The
ASReviewDataobject, see https://asreview.readthedocs.io - write_interval: Interval measured in number of queries at which to write the state from memory to the state file.
- seed: Random seed for the simulation
- no_zip: Whether to forgo compressing the project temporary directory into a zipped archive once the simulation ends.
Returns:
The objective score or
None.
Synopsis:
Runs the ASReview simulation with the provided choice of models and their parameterization (i.e., input argument
config).
Example usage:
import os import tempfile from asreviewcontrib.simulation.api import Config from asreviewcontrib.simulation.api import OneModelConfig from asreviewcontrib.simulation.api import prep_project_directory from asreviewcontrib.simulation.api import run # make a classifier model config using default parameter values # given the model name clr = OneModelConfig("clr-svm") # make a query model config using positional arguments, and a # partial params dict qry = OneModelConfig("qry-max-random", {"fraction_max": 0.90}) # make a stopping model config using keyword arguments stp = OneModelConfig(abbr="stp-nq", params={"n_queries": 10}) # construct an all model config from one model configs -- implicitly # use default model choice and parameterization for models not # included as argument (i.e. sam, fex, bal, ofn) config = Config(clr=clr, qry=qry, stp=stp) # arbitrarily pick a benchmark dataset benchmark = "benchmark:Cohen_2006_ADHD" # create a temporary directory tmpdir = tempfile.mkdtemp(prefix="asreview-simulation.", dir=".") output_file = f"{tmpdir}{os.sep}project.asreview" # prepare the directory that holds the state of the simulation project, as_data = prep_project_directory(benchmark=benchmark, output_file=output_file) # start the simulation run(config, project, as_data)