automl / auto-pytorch Goto Github PK
View Code? Open in Web Editor NEWAutomatic architecture search and hyperparameter optimization for PyTorch
License: Apache License 2.0
Automatic architecture search and hyperparameter optimization for PyTorch
License: Apache License 2.0
hi,auto-pytorch is only support classification task??
If I have some special issue and some loss function, can I use this for find hyper-parameter like classification task?
When setting the max_budget > max_runtime and setting the budget to runtime the program crashes (logically), but the error messages is non descriptive: KeyError: 'Imputation:strategy'
from autoPyTorch import AutoNetClassification
# data and metric imports
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
X, y = sklearn.datasets.load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = \
sklearn.model_selection.train_test_split(X, y, random_state=1)
# running Auto-PyTorch
autoPyTorch = AutoNetClassification(log_level='info', max_runtime=30, min_budget=30, max_budget=90, budget_type='time')
autoPyTorch.fit(X_train, y_train, validation_split=0.3)
y_pred = autoPyTorch.predict(X_test)
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_pred))
results in
KeyError Traceback (most recent call last)
<ipython-input-2-24ae2d25f22b> in <module>
11 # running Auto-PyTorch
12 autoPyTorch = AutoNetClassification(log_level='info', max_runtime=30, min_budget=30, max_budget=90, budget_type='time')
---> 13 autoPyTorch.fit(X_train, y_train, validation_split=0.3)
14 y_pred = autoPyTorch.predict(X_test)
15
~/Work/git/Auto-PyTorch/autoPyTorch/core/api.py in fit(self, X_train, Y_train, X_valid, Y_valid, refit, **autonet_config)
114 self.optimized_hyperparameter_config_budget = output["budget"]
115 if (refit):
--> 116 self.refit(X_train, Y_train, X_valid, Y_valid, self.optimized_hyperparameter_config, self.autonet_config)
117 return self.optimized_hyperparameter_config, output['final_metric_score']
118
~/Work/git/Auto-PyTorch/autoPyTorch/core/api.py in refit(self, X_train, Y_train, X_valid, Y_valid, hyperparameter_config, autonet_config)
146
147 self.pipeline.fit_pipeline(pipeline_config=autonet_config, refit=refit_data,
--> 148 X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid)
149
150 def predict(self, X, return_probabilities=False):
~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/base/pipeline.py in fit_pipeline(self, **kwargs)
45
46 def fit_pipeline(self, **kwargs):
---> 47 return self.root.fit_traverse(**kwargs)
48
49 def predict_pipeline(self, **kwargs):
~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/base/node.py in fit_traverse(self, **kwargs)
73 raise ValueError('Node ' + str(type(node)) + ' requires keyword ' + str(keyword) + ' which is not available.')
74
---> 75 node.fit_output = node.fit(**required_kwargs)
76 if (not isinstance(node.fit_output, dict)):
77 raise ValueError('Node ' + str(type(node)) + ' does not return a dictionary.')
~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/nodes/optimization_algorithm.py in fit(self, pipeline_config, X_train, Y_train, X_valid, Y_valid, refit)
77 X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid,
78 budget=refit["budget"], budget_type=self.budget_types[pipeline_config['budget_type']],
---> 79 optimize_start_time=time.time())
80
81 return {'final_metric_score': res['loss'],
~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/base/pipeline.py in fit_pipeline(self, **kwargs)
45
46 def fit_pipeline(self, **kwargs):
---> 47 return self.root.fit_traverse(**kwargs)
48
49 def predict_pipeline(self, **kwargs):
~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/base/node.py in fit_traverse(self, **kwargs)
73 raise ValueError('Node ' + str(type(node)) + ' requires keyword ' + str(keyword) + ' which is not available.')
74
---> 75 node.fit_output = node.fit(**required_kwargs)
76 if (not isinstance(node.fit_output, dict)):
77 raise ValueError('Node ' + str(type(node)) + ' does not return a dictionary.')
~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/nodes/cross_validation.py in fit(self, hyperparameter_config, pipeline_config, X_train, Y_train, X_valid, Y_valid, budget, budget_type, optimize_start_time)
98 budget=cur_budget, training_techniques=[budget_type()],
99 fit_start_time=time.time(),
--> 100 categorical_features=categorical_features)
101
102 if result is not None:
~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/base/pipeline.py in fit_pipeline(self, **kwargs)
45
46 def fit_pipeline(self, **kwargs):
---> 47 return self.root.fit_traverse(**kwargs)
48
49 def predict_pipeline(self, **kwargs):
~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/base/node.py in fit_traverse(self, **kwargs)
73 raise ValueError('Node ' + str(type(node)) + ' requires keyword ' + str(keyword) + ' which is not available.')
74
---> 75 node.fit_output = node.fit(**required_kwargs)
76 if (not isinstance(node.fit_output, dict)):
77 raise ValueError('Node ' + str(type(node)) + ' does not return a dictionary.')
~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/nodes/imputation.py in fit(self, hyperparameter_config, X_train, X_valid, categorical_features)
21 hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
22
---> 23 strategy = hyperparameter_config['strategy']
24 fill_value = int(np.nanmax(X_train)) + 1 if not scipy.sparse.issparse(X_train) else 0
25 numerical_imputer = SimpleImputer(strategy=strategy, copy=False)
~/Work/git/Auto-PyTorch/autoPyTorch/utils/configspace_wrapper.py in __getitem__(self, key)
21 if ((self.config_prefix + key) not in self.config):
22 print(self.config)
---> 23 return self.config[self.config_prefix + key]
24
25 def __str__(self):
KeyError: 'Imputation:strategy'
Related to This issue,
I found that even pre-executing the following function makes autonet.fit
to malfunction(Same error message could not reinitialize cuda fork ... something... something
def get_cuda_summary():
import torch
if torch.cuda.is_available():
print('CUDA device = ', torch.cuda.get_device_name())
print('Available number of devices = ', torch.cuda.device_count())
print('Device numbers = ', list(range(torch.cuda.device_count())))
print('Current device = ', torch.cuda.current_device())
print(torch.cuda.memory_summary())
else:
print('cuda is not available')
#torch.cuda.set_device(1)
get_cuda_summary()
In addition to that, I do not know how to do torch.cuda.set_device(1)
without failing autonet.fit()
Anyway it looks promising project and I thank you for the project
I am wondering how I can deal with multi-headed model.
PyTorch seems to have two distinct way of dealing multiple outputs.
return y1, y2, y3
return torch.cat([y1,y2,y3], axis=1)
I think if the model output is the first, we need to modify the loss function.
I have tried as you mentioned, the result is that,
develop branch doesnt seem to be stable.
as I tried the tutorial, error occurs.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-6-66db46cdcea7> in <module>
1 # Get the ConfigSpace object with all hyperparameters, conditions, default values and default ranges
----> 2 hyperparameter_search_space = autonet.get_hyperparameter_search_space()
3
4 # Print all possible configuration options
5 #autonet.print_help()
~/anaconda3/envs/autopytorch/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/core/api.py in get_hyperparameter_search_space(self, X_train, Y_train, X_valid, Y_valid, **autonet_config)
101 Y_valid=Y_valid)["dataset_info"]
102
--> 103 return self.pipeline.get_hyperparameter_search_space(dataset_info=dataset_info, **pipeline_config)
104
105 @classmethod
~/anaconda3/envs/autopytorch/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/pipeline/base/pipeline.py in get_hyperparameter_search_space(self, dataset_info, **pipeline_config)
109 for name, node in self._pipeline_nodes.items():
110 #print("dataset_info" in pipeline_config.keys())
--> 111 config_space = node.get_hyperparameter_search_space(**pipeline_config)
112 cs.add_configuration_space(prefix=name, configuration_space=config_space, delimiter=ConfigWrapper.delimiter)
113
~/anaconda3/envs/autopytorch/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/pipeline/nodes/imputation.py in get_hyperparameter_search_space(self, dataset_info, **pipeline_config)
55
56 cs = ConfigSpace.ConfigurationSpace()
---> 57 cs.add_hyperparameter(CSH.CategoricalHyperparameter("strategy", possible_strategies))
58 self._check_search_space_updates()
59 return cs
ConfigSpace/hyperparameters.pyx in ConfigSpace.hyperparameters.CategoricalHyperparameter.__init__()
TypeError: Using a set of choices is prohibited as it can result in non-deterministic behavior. Please use a list or a tuple.
many suggestions?
The error occurs while executing hyperparameter_search_space = autonet.get_hyperparameter_search_space()
in the Auto-PyTorch Tutorial.ipynb
Originally posted by @maxmarketit in #33 (comment)
When running the AutoNetImageClassification
according to the example Auto-PyTorch Tutorial notebook, I get the following error:
Process pynisher function call:
Traceback (most recent call last):
File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pynisher/limit_function_call.py", line 93, in subprocess_func
return_value = ((func(*args, **kwargs), 0))
File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/core/worker_no_timelimit.py", line 108, in optimize_pipeline
random.setstate(random_state)
File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/random.py", line 134, in setstate
version = state[0]
TypeError: 'float' object is not subscriptable
02:30:09 job (0, 0, 0) failed with exception
Traceback (most recent call last):
Hi, I find that the autonet prediction results are different from the result predicted by the pytorch model obtained from autonet using get_pytorch_model function. To be specific:
res_autonet = autonet.predict(X_train)
is different from:
model = autonet.get_pytorch_model()
model.eval()
X_train = X_train.astype(np.float32)
res_model = model(Variable(torch.from_numpy(X_train).cuda())).data.cpu().numpy()
As can be seen in the third picture, the two predictions have different values, while they're supposed to be equal.
The full code could be seen as attached:
__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
__version__ = "0.0.1"
__license__ = "BSD"
import os, sys
sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "..")))
from autoPyTorch import AutoNetRegression
from autoPyTorch.data_management.data_manager import DataManager
# Note: You can write your own datamanager! Call fit train, valid data (numpy matrices)
dm = DataManager()
dm.generate_regression(num_features=21, num_samples=1500)
X_train=dm.X
Y_train=dm.Y
X_valid=dm.X_train
Y_valid=dm.Y_train
# Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test.
autonet = AutoNetRegression(budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='info')
res = autonet.fit(X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid)
print(res)
res_autonet = autonet.predict(X_train)
##retrain
import numpy as np
import torch
from torch.autograd import Variable
model = autonet.get_pytorch_model()
model.eval()
#autonet.print_help()
X_train = X_train.astype(np.float32)
res_model = model(Variable(torch.from_numpy(X_train).cuda())).data.cpu().numpy()
#plot
import matplotlib.pyplot as plt
plt.figure(figsize=(10,7))
plt.subplot(221)
plt.plot(Y_train,res_model,'.')
plt.xlabel('True result')
plt.ylabel('Model result')
plt.subplot(222)
plt.plot(Y_train,res_autonet,'.')
plt.xlabel('True result')
plt.ylabel('Model result')
plt.subplot(223)
plt.plot(res_autonet,res_model,'.')
plt.xlabel('AutoNet result')
plt.ylabel('Model result')
I get the following error saying that torch doesn't have AdamW optimizer.
Process pynisher function call:
Traceback (most recent call last):
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/site-packages/pynisher/limit_function_call.py", line 93, in subprocess_func
return_value = ((func(*args, **kwargs), 0))
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/core/worker.py", line 124, in optimize_pipeline
raise e
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/core/worker.py", line 118, in optimize_pipeline
refit=False, rescore=False, hyperparameter_config_id=config_id, dataset_info=self.dataset_info)
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/pipeline/base/pipeline.py", line 60, in fit_pipeline
return self.root.fit_traverse(**kwargs)
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/pipeline/base/node.py", line 115, in fit_traverse
node.fit_output = node.fit(**required_kwargs)
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/pipeline/nodes/cross_validation.py", line 108, in fit
result = self.sub_pipeline.fit_pipeline(X=X, Y=Y, **sub_pipeline_kwargs)
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/pipeline/base/pipeline.py", line 60, in fit_pipeline
return self.root.fit_traverse(**kwargs)
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/pipeline/base/node.py", line 115, in fit_traverse
node.fit_output = node.fit(**required_kwargs)
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/pipeline/nodes/optimizer_selector.py", line 28, in fit
return {'optimizer': optimizer_type(network.parameters(), optimizer_config)}
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/components/optimizer/optimizer.py", line 22, in __new__
return cls._get_optimizer(cls, params, config)
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/components/optimizer/optimizer.py", line 51, in _get_optimizer
return optim.AdamW(params=params, lr=config['learning_rate'], weight_decay=config['weight_decay'])
AttributeError: module 'torch.optim' has no attribute 'AdamW'
20:41:38 job (0, 0, 78) failed with exception
Traceback (most recent call last):
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/site-packages/hpbandster/core/worker.py", line 206, in start_computation
result = {'result': self.compute(*args, config_id=id, **kwargs),
File "/home/yangmo/anaconda2/envs/python3/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/core/worker.py", line 86, in compute
raise Exception("Exception in train pipeline. Took " + str((time.time()-start_time)) + " seconds with budget " + str(budget))
Exception: Exception in train pipeline. Took 0.11802005767822266 seconds with budget 1.1111111111111112
I checked my pytorch 1.1.0, it doesn't have AdamW. Is this a version issue or?
>>> optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: module 'torch.optim' has no attribute 'AdamW'
>>> optimizer = torch.optim.Adamw(model.parameters(), lr=learning_rate)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: module 'torch.optim' has no attribute 'Adamw'
>>> optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: module 'torch.optim' has no attribute 'AdamW'
>>> import torch print(torch.__version__)
File "<stdin>", line 1
import torch print(torch.__version__)
^
SyntaxError: invalid syntax
>>> import torch
>>> print(torch.__version__)
1.1.0
>>>
Hi Lucas,
Running the example in the README (one the release 0.0.2 branch) lead to a spammy output again, although the log level should be only info
. Could you please try to minimize the output again:
10:05:07 [AutoNet] Start bohb
10:05:07 DISPATCHER: started the 'discover_worker' thread
10:05:07 DISPATCHER: started the 'job_runner' thread
10:05:07 WORKER: start listening for jobs
10:05:07 DISPATCHER: Pyro daemon running on 130.75.31.175:38245
10:05:07 DISPATCHER: discovered new worker, hpbandster.run_0.worker.amaterasu.12367.-1140518054266560
10:05:07 HBMASTER: adjusted queue size to (0, 1)
10:05:07 DISPATCHER: A new worker triggered discover_worker
10:05:07 HBMASTER: starting run at 1570521907.0837104
10:05:07 WORKER: start processing job (0, 0, 0)
10:05:07 Fit optimization pipeline
10:05:07 [AutoNet] No validation set given and either no cross validator given or budget too low for CV. Continue by splitting 0.3 of training data.
10:05:07 [AutoNet] CV split 0 of 1
10:05:07 Reduced initial budget 29.959715366363525 to cv budget 29.959677934646606 compensate for 3.743171691894531e-05
...
Best,
Marius
Hello!
Auto-Pytorch fails with the following error:
Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
What am I doing wrong?
After changing configurations, e.g., setting preprocessors=['power_transformer'] from the tiny_cs config, I got an error saying "Invalid search space update given: truncated_svd:target_dim". It seems I need to re-initialize the hyperparameter space. How to do that?
If only the validation split contains NaNs in certain columns, there might be errors.
Every time the master will get the following error, while the workers don't
Anyone knows how to fix it?
Thanks
exception: Traceback (most recent call last):
File "/users/JIACHEN/.local/lib/python3.6/site-packages/hpbandster/core/worker.py", line 206, in start_computation
result = {'result': self.compute(*args, config_id=id, **kwargs),
File "/usr/local/lib/python3.6/dist-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/core/worker.py", line 92, in compute
result = self.optimize_pipeline(config, config_id, budget, start_time)
File "/usr/local/lib/python3.6/dist-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/core/worker.py", line 131, in optimize_pipeline
raise e
File "/usr/local/lib/python3.6/dist-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/core/worker.py", line 125, in optimize_pipeline
refit=False, rescore=False, hyperparameter_config_id=config_id, dataset_info=self.dataset_info)
File "/usr/local/lib/python3.6/dist-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/base/pipeline.py", line 60, in fit_pipeline
return self.root.fit_traverse(**kwargs)
File "/usr/local/lib/python3.6/dist-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/base/node.py", line 110, in fit_traverse
node.fit_output = node.fit(**required_kwargs)
File "/usr/local/lib/python3.6/dist-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/nodes/cross_validation.py", line 124, in fit
X=X, Y=Y, logger=logger)
File "/usr/local/lib/python3.6/dist-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/nodes/cross_validation.py", line 288, in process_additional_results
combinators[name] = additional_results[split][name]["combinator"]
TypeError: 'int' object is not subscriptable
In config/configspace there are files with a more readable parameters configspace txt version that the ones in core/presets and it reads as if you can set batch size using it. How do you use that format of the one expected by config_preset= in creating AutoNet classes?
e.g. config/configspace/tiny_cs.txt
CreateDataLoader batch_size [125]
InitializationSelector initializer:initialize_bias ["No"]
Thanks,
Ric
When running on a Regression dataset with default inputs (i.e. as per your Classification example, but using AutoNetRegression instead), I see an error as follows:
ValueError: Config option loss_modules contains following invalid values {'cross_entropy_weighted'}, chose a subset of ['l1_loss']
However, it would be ideal if the default set of loss_modules for the regression is only the permissible losses :)
I found that the mininumu number of features for input is 2. Is there any reason?
Because I made custom loss function utilizing two input variables asymmetrically,
and it does not seem to work. So I am wondering if there is any particular reason
that might lead to poor training... I might add my particular loss function and the result later
Addressed in #44
Related to #20
But I figure that torch.save(model2 = autonet.get_pytorch_model(), 'model.pth')
does not save preprocessing. And I find that using torch.save
and torch.load
would not guarantee the same result from
autonet.predict
because preprocessing is missing.
So how can I save the autonet
model and load or How can I find out the preprocessing auto-pytorch found and save and load the same procedure?
ps. I am using develop
branch.
Hello,
So, I have a quick question. Is auto-pytorch optimized (written) to run a GPU automatically or I need to call cuda=True in the algorithm, say AutoNetClassification?
I am asking, because even when I pass cuda=True, when running on a GPU enabled instance, I don't see any difference in time taken as compared to just running on a CPU.
RuntimeError: No models fit during training, please retry with a larger max_runtime.
Please, can someone help. I have been "fighting" this for days now. I first had an issue with the 'create_data_info.py' in /pipeline/nodes raising error on line 33 about lists, which i managed to fix.
Also "/autoPyTorch/core/autonet_classes/autonet_feature_classification.py" line 61 gives AttributeError: 'list' object has no attribute 'reshape'
Hi,
I tried to run AutoPyTorch again:
from autoPyTorch import AutoNetImageClassification
import numpy as np
import os as os
autonet_image_classification = AutoNetImageClassification(config_preset="full_cs", result_logger_dir="logs/")
path_to_cifar_csv = os.path.abspath("./datasets/CIFAR10.csv")
autonet_image_classification.fit(X_train=np.array([path_to_cifar_csv]),
Y_train=np.array([0]),
min_budget=300,
max_budget=900,
max_runtime=18000,
default_dataset_download_dir="./datasets",
images_root_folders=["./datasets"],
log_level="info" )
However, after nearly 2h Auto-PyTorch used more than 60gb RAM.
========== Job Epilogue Start ============
Job Id: 4274574.batch.css.lan
Resources requested by Job: mem=60gb,neednodes=1:ppn=1,nodes=1:ppn=1,walltime=08:00:00
Resources used by Job: cput=01:48:31,mem=77352676kb,vmem=83220752kb,walltime=01:49:07
Execution host(s): dumbo-n014
Job Exit Status: 271
========== Job Epilogue End ============
Do you have any idea why that's the case?
For the first 20min, it used only roughly 7gb.
When running examples/basics/basic_classification.py
on a CPU, I receive the following output and error:
11:11:18 WORKER: start listening for jobs
11:11:18 [AutoNet] Start bohb
11:11:18 DISPATCHER: started the 'discover_worker' thread
11:11:18 DISPATCHER: started the 'job_runner' thread
11:11:18 DISPATCHER: Pyro daemon running on 10.5.150.146:37155
11:11:19 DISPATCHER: discovered new worker, hpbandster.run_0.worker.mllap06.2084.-1139912695220032
11:11:19 HBMASTER: starting run at 1548324679.0029259
11:11:19 HBMASTER: adjusted queue size to (0, 1)
11:11:19 WORKER: start processing job (0, 0, 0)
11:11:19 Fit optimization pipeline
11:11:22 Finished train with budget 1.0: Preprocessing took 0s, Training took 2s, Wrap up took 0s. Total time consumption in s: 3
11:11:22 Training ['resnet'] with budget 1.0 resulted in score: -39.92592692375183 took 3.185837745666504 seconds
11:11:22 WORKER: registered result for job (0, 0, 0) with dispatcher
11:11:22 WORKER: start processing job (0, 0, 1)
11:11:22 Fit optimization pipeline
11:11:23 Finished train with budget 1.0: Preprocessing took 0s, Training took 0s, Wrap up took 0s. Total time consumption in s: 1
11:11:23 Training ['shapedmlpnet'] with budget 1.0 resulted in score: -39.92592692375183 took 1.4267945289611816 seconds
11:11:23 WORKER: registered result for job (0, 0, 1) with dispatcher
11:11:23 WORKER: start processing job (0, 0, 2)
11:11:23 Fit optimization pipeline
11:11:30 Finished train with budget 1.0: Preprocessing took 0s, Training took 5s, Wrap up took 1s. Total time consumption in s: 7
11:11:31 Training ['resnet'] with budget 1.0 resulted in score: -27.77777910232544 took 7.370416879653931 seconds
11:11:31 WORKER: registered result for job (0, 0, 2) with dispatcher
11:11:31 WORKER: start processing job (0, 0, 3)
11:11:31 Fit optimization pipeline
11:11:33 Finished train with budget 1.0: Preprocessing took 1s, Training took 0s, Wrap up took 0s. Total time consumption in s: 2
11:11:33 Training ['resnet'] with budget 1.0 resulted in score: -27.77777910232544 took 2.184109687805176 seconds
11:11:33 WORKER: registered result for job (0, 0, 3) with dispatcher
11:11:33 WORKER: start processing job (0, 0, 4)
11:11:33 Fit optimization pipeline
11:11:36 Finished train with budget 1.0: Preprocessing took 0s, Training took 2s, Wrap up took 0s. Total time consumption in s: 3
11:11:36 Training ['resnet'] with budget 1.0 resulted in score: -27.77777910232544 took 3.546250581741333 seconds
11:11:36 WORKER: registered result for job (0, 0, 4) with dispatcher
11:11:36 WORKER: start processing job (0, 0, 5)
11:11:36 Fit optimization pipeline
/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/scikit_learn-0.20.2-py3.6-linux-x86_64.egg/sklearn/decomposition/fastica_.py:305: UserWarning: n_components is too large: it will be set to 21
warnings.warn('n_components is too large: it will be set to %s' % n_components)
[...]
11:12:12 WORKER: registered result for job (0, 0, 6) with dispatcher
11:12:12 WORKER: start processing job (0, 0, 0)
11:12:12 Fit optimization pipeline
11:12:35 Finished train with budget 9.0: Preprocessing took 0s, Training took 21s, Wrap up took 0s. Total time consumption in s: 22
11:12:35 Training ['resnet'] with budget 9.0 resulted in score: -61.33333444595337 took 22.3313090801239 seconds
11:12:35 WORKER: registered result for job (0, 0, 0) with dispatcher
11:12:35 DISPATCHER: Dispatcher shutting down
11:12:35 DISPATCHER: shut down complete
11:12:35 Start autonet with config:
{'budget_type': 'epochs', 'min_budget': 1, 'max_budget': 9, 'num_iterations': 1, 'log_level': 'info', 'shuffle': True, 'hyperparameter_search_space_updates': None, 'run_id': '0', 'task_id': -1, 'algorithm': 'bohb', 'result_logger_dir': '.', 'eta': 3, 'min_workers': 1, 'working_dir': '.', 'network_interface_name': 'eth1', 'memory_limit_mb': 1000000, 'use_tensorboard_logger': False, 'validation_split': 0.0, 'cv_splits': 1, 'use_stratified_cv_split': True, 'min_budget_for_cv': 0, 'half_num_cv_splits_below_budget': 0, 'imputation_strategies': ['mean', 'median', 'most_frequent'], 'normalization_strategies': ['none', 'minmax', 'standardize', 'maxabs'], 'categorical_features': [], 'preprocessors': ['none', 'truncated_svd', 'fast_ica', 'kitchen_sinks', 'kernel_pca', 'nystroem'], 'over_sampling_methods': ['none', 'random', 'smote'], 'under_sampling_methods': ['none', 'random'], 'target_size_strategies': ['none', 'upsample', 'downsample', 'average', 'median'], 'embeddings': ['none', 'learned'], 'networks': ['mlpnet', 'shapedmlpnet', 'resnet', 'shapedresnet'], 'final_activation': 'softmax', 'optimizer': ['adam', 'sgd'], 'lr_scheduler': ['cosine_annealing', 'cyclic', 'exponential', 'step', 'plateau', 'none'], 'additional_logs': [], 'train_metric': 'accuracy', 'additional_metrics': [], 'loss_modules': ['cross_entropy', 'cross_entropy_weighted'], 'batch_loss_computation_techniques': ['standard', 'mixup'], 'training_techniques': ['early_stopping'], 'minimize': False, 'cuda': True, 'eval_on_training': False, 'full_eval_each_epoch': False, 'early_stopping_patience': inf, 'early_stopping_reset_parameters': False, 'random_seed': 647837117, 'max_runtime': inf}
11:12:56 Finished train with budget 9.0: Preprocessing took 0s, Training took 20s, Wrap up took 0s. Total time consumption in s: 21
({'Imputation:strategy': 'most_frequent', 'LearningrateSchedulerSelector:lr_scheduler': 'cyclic', 'LossModuleSelector:loss_module': 'cross_entropy_weighted', 'NetworkSelector:network': 'resnet', 'NormalizationStrategySelector:normalization_strategy': 'none', 'OptimizerSelector:optimizer': 'adam', 'PreprocessorSelector:preprocessor': 'nystroem', 'ResamplingStrategySelector:over_sampling_method': 'smote', 'ResamplingStrategySelector:target_size_strategy': 'downsample', 'ResamplingStrategySelector:under_sampling_method': 'random', 'TrainNode:batch_loss_computation_technique': 'standard', 'TrainNode:batch_size': 90, 'LearningrateSchedulerSelector:cyclic:cycle_length': 10, 'LearningrateSchedulerSelector:cyclic:max_factor': 1.192086838687192, 'LearningrateSchedulerSelector:cyclic:min_factor': 0.1739108186563212, 'NetworkSelector:resnet:activation': 'sigmoid', 'NetworkSelector:resnet:blocks_per_group': 4, 'NetworkSelector:resnet:num_groups': 5, 'NetworkSelector:resnet:num_units_0': 12, 'NetworkSelector:resnet:num_units_1': 134, 'NetworkSelector:resnet:use_dropout': False, 'NetworkSelector:resnet:use_shake_drop': False, 'NetworkSelector:resnet:use_shake_shake': False, 'OptimizerSelector:adam:learning_rate': 0.03881547471994297, 'OptimizerSelector:adam:weight_decay': 0.04619218671240021, 'PreprocessorSelector:nystroem:kernel': 'poly', 'PreprocessorSelector:nystroem:n_components': 449, 'ResamplingStrategySelector:smote:k_neighbors': 3, 'NetworkSelector:resnet:num_units_2': 374, 'NetworkSelector:resnet:num_units_3': 413, 'NetworkSelector:resnet:num_units_4': 244, 'NetworkSelector:resnet:num_units_5': 14, 'PreprocessorSelector:nystroem:coef0': -0.2635372875151014, 'PreprocessorSelector:nystroem:degree': 3, 'PreprocessorSelector:nystroem:gamma': 0.00023889215064720927}, -61.33333444595337)
Traceback (most recent call last):
File "examples/basics/basic_classification.py", line 20, in <module>
print("Score:", autonet.score(X_test=dm.X_train, Y_test=dm.Y_train))
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/core/api.py", line 184, in score
self.pipeline.predict_pipeline(pipeline_config=self.autonet_config, X=X_test)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/base/pipeline.py", line 50, in predict_pipeline
return self.root.predict_traverse(**kwargs)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/base/node.py", line 136, in predict_traverse
node.predict_output = node.predict(**required_kwargs)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/nodes/optimization_algorithm.py", line 116, in predict
result = self.sub_pipeline.predict_pipeline(pipeline_config=pipeline_config, X=X)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/base/pipeline.py", line 50, in predict_pipeline
return self.root.predict_traverse(**kwargs)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/base/node.py", line 136, in predict_traverse
node.predict_output = node.predict(**required_kwargs)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/nodes/cross_validation.py", line 121, in predict
result = self.sub_pipeline.predict_pipeline(pipeline_config=pipeline_config, X=X)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/base/pipeline.py", line 50, in predict_pipeline
return self.root.predict_traverse(**kwargs)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/base/node.py", line 136, in predict_traverse
node.predict_output = node.predict(**required_kwargs)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/nodes/train_node.py", line 101, in predict
Y = predict(network, X, 20, device)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/autoPyTorch-0.0.1-py3.6.egg/autoPyTorch/pipeline/nodes/train_node.py", line 276, in predict
network = network.to(device)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/torch/nn/modules/module.py", line 381, in to
return self._apply(convert)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/torch/nn/modules/module.py", line 187, in _apply
module._apply(fn)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/torch/nn/modules/module.py", line 187, in _apply
module._apply(fn)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/torch/nn/modules/module.py", line 193, in _apply
param.data = fn(param.data)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/torch/nn/modules/module.py", line 379, in convert
return t.to(device, dtype if t.is_floating_point() else None, non_blocking)
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/torch/cuda/__init__.py", line 161, in _lazy_init
_check_driver()
File "/home/eggenspk/anaconda3/envs/Autopytorch_36/lib/python3.6/site-packages/torch/cuda/__init__.py", line 75, in _check_driver
raise AssertionError("Torch not compiled with CUDA enabled")
AssertionError: Torch not compiled with CUDA enabled
It seems like it is searching for a GPU only for scoring the final model (also the incumbent has 'cuda': True
), but of course there is none as I am using pytorch-cpu
. Also python -c "import torch; torch.cuda.is_available()"
returns False
on my machine.
In some cases, we have multi-level data,
something like instances in a cluster(ex. students in a school, different ages for the same person,
leave-one-person-out (LOPO) protocol in this repo)
Is there any way to incorporate the multi-level structure of data for auto-PyTorch?
Hello all, ✋
I really loved your amazing book and the work you’ve done here. I work on the “Flyte” project at Lyft and I’ve been playing around with adapting AutoPyTorch to work on Flyte.
I’m very passionate about open-source ML workflows (see here to understand what i'm doing).
I sort-of hacked the AutoPyTorch codebase in order to prove that it can work on Flyte in a distributed way. Now that I’ve done that, I’m interested in doing it the right way and I’d love to chat with someone to discuss.
Flyte handles a lot of the overhead of distributed systems (ensuring data gets from one task to the next), so I modified AutoPytorch to remove all the network calls and master/worker logic.
The new logic no longer needs a master/worker setup. Instead it runs a series of ephemeral containers, and Flyte handles moving the data from one task to the next.
The logic looks something like this (where each "-" step is a separate container):
While True:
- get hyperparameters (store the configs to disk)
- run containers (one per config). fit model (using the config). Store the result and ensemble predictions to disk.
- build an ensemble, which can be used for prediction.
In Flyte, you also get lots of visibility into where failures happen in a nice UI. It’s pretty cool to see it all working. I can tell exactly which set of HPs failed (if any).
Unfortunately, the master/worker logic is pretty baked into AutoPyTorch (and HpBandSter) right now. I had to do some very hacky things to get the POC working. I’m hoping I can chat with someone about how I can do this more properly.
Thank you for the promising package.
I wonder how I can use multi-gpus.
I tinkered with num_workers
and torch_num_threads
but nothing seemed to be changed.
( I wish I could have a document about configuration ).
So is there any way to use multiple gpus? or set the default gpu to use other than 0?
It appears that the manifest is missing at least one file necessary to build
from the sdist for version 0.0.2. You're in good company, about 5% of other
projects updated in the last year are also missing files.
+ /tmp/venv/bin/pip3 wheel --no-binary autopytorch -w /tmp/ext autopytorch==0.0.2
Looking in indexes: http://10.10.0.139:9191/root/pypi/+simple/
Collecting autopytorch==0.0.2
Downloading http://10.10.0.139:9191/root/pypi/%2Bf/ed1/85e341888324f/autoPyTorch-0.0.2.tar.gz (208 kB)
ERROR: Command errored out with exit status 1:
command: /tmp/venv/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-wheel-svgp25f0/autopytorch/setup.py'"'"'; __file__='"'"'/tmp/pip-wheel-svgp25f0/autopytorch/setup.py'"'"';f=getattr(tokenize, '"'"'open'"'"', open)(__file__);code=f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' egg_info --egg-base /tmp/pip-wheel-svgp25f0/autopytorch/pip-egg-info
cwd: /tmp/pip-wheel-svgp25f0/autopytorch/
Complete output (5 lines):
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/tmp/pip-wheel-svgp25f0/autopytorch/setup.py", line 8, in <module>
with open('requirements.txt', 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: 'requirements.txt'
----------------------------------------
ERROR: Command errored out with exit status 1: python setup.py egg_info Check the logs for full command output.
Hi,
Thank you for providing such wonderful work!
My question may be simple: I use the fit function to get the best model architecture and model parameters, optimizer, etc. But how can I save the result and load it in another file?
For example, now I can see the model's architecture by:
>>> model = autonet.get_pytorch_model()
>>> model.eval()
Out[45]:
Sequential(
(0): Linear(in_features=2, out_features=437, bias=True)
(1): Sequential(
(0): ResBlock(
(shortcut): Linear(in_features=437, out_features=147, bias=True)
(start_norm): Sequential(
(0): BatchNorm1d(437, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
)
(layers): Sequential(
(0): Linear(in_features=437, out_features=147, bias=True)
(1): BatchNorm1d(147, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): Dropout(p=0.30851922736236553)
(4): Linear(in_features=147, out_features=147, bias=True)
)
)
(1): ResBlock(
(layers): Sequential(
(0): BatchNorm1d(147, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Linear(in_features=147, out_features=147, bias=True)
(3): BatchNorm1d(147, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(4): ReLU()
(5): Dropout(p=0.30851922736236553)
(6): Linear(in_features=147, out_features=147, bias=True)
)
)
)
(2): Sequential(
(0): ResBlock(
(layers): Sequential(
(0): BatchNorm1d(147, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Linear(in_features=147, out_features=147, bias=True)
(3): BatchNorm1d(147, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(4): ReLU()
(5): Dropout(p=0.0)
(6): Linear(in_features=147, out_features=147, bias=True)
)
)
(1): ResBlock(
(layers): Sequential(
(0): BatchNorm1d(147, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Linear(in_features=147, out_features=147, bias=True)
(3): BatchNorm1d(147, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(4): ReLU()
(5): Dropout(p=0.0)
(6): Linear(in_features=147, out_features=147, bias=True)
)
)
)
(3): BatchNorm1d(147, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(4): ReLU()
(5): Linear(in_features=147, out_features=1, bias=True)
)
But how can I reuse the structure in another file? Just manually typing? I think there must be some smart way to do this. May 'rifit' can do the job, I just don't know how.
However, refit might be useful if you want to fit on the full dataset or even another dataset or if you just want to fit a model without searching.
To test the performance of auto-pytorch, I wish you could offer us an executable colab example, like mnist classification or other simple applications. This may help us a lot to understand how to use your code!
Thank you for your solid research, look forward to your reply!
Jiale Tan
Greetings,
I am trying to use AutoNetEnsemble
but I keep getting the same error:
Process Process-11:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/home/ixanthos/Auto-PyTorch/autopytorch_env/lib/python3.6/site-packages/autoPyTorch-0.0.2-py3.6.egg/autoPyTorch/utils/ensemble.py", line 137, in _start_server
server = loop.run_until_complete(coro)
File "/usr/lib/python3.6/asyncio/base_events.py", line 471, in run_until_complete
self.run_forever()
File "/usr/lib/python3.6/asyncio/base_events.py", line 425, in run_forever
raise RuntimeError('This event loop is already running')
RuntimeError: This event loop is already running
The code I am running is:
autonet = AutoNetEnsemble(AutoNetClassification)
result = autonet.fit(train_data,train_target,max_runtime=time_threshold,min_budget=time_threshold/6,max_budget = time_threshold/3, optimize_metric = 'auc_metric', cross_validator = 'stratified_k_fold',
cross_validator_args = {'n_splits': 3},categorical_features= dataset_info_dict['categorical_vars'])
The categorical vars are a boolean vector that I use, the time_threshold is in seconds, the rest are preconfigured.
AutoNetClassification
works fine with no issues, but I have been unable to run the ensembler.
As mentioned in #37 I'd like to add a feature to APT to evaluate all (maybe up to a configurable max) incumbent configurations on the maximum budget with tensorboard-logging. This could be done by calling a function run_incumbents_on_max_budget
of the autopytorch-object. It would be very handy to have this for:
Obviously this can take a long time for certain scenarios, thus it should imho be an optional call. I'll make a proposal for an easy implementation and commit a PR.
Hi,
I recently started using AutoPytorch for discovering normal mlpnets for the regression setting. Unfortunately, if my data has 21 features as in the example, the learned network actually takes only 20 input features. I'm not sure what I am doing wrong, but if I try to use the model for further predictions as a standard pytorch model, it gives errors because of the matrix dimension mismatch. Any solutions to this?
Thank you for the wonderful work! Could you please provide an example showing how to use the API with the AutoNetImageClassification class? An example using CIFAR-10 would be great!
Thanks!
Sam
I found how to set hyperparameter space
from autoPyTorch import HyperparameterSearchSpaceUpdates
search_space_updates = HyperparameterSearchSpaceUpdates()
search_space_updates.append(node_name="NetworkSelector",
hyperparameter="mlpnet:num_layers",
value_range=[1,4], log=False)
But is there any way to set the default value for the hyperparameters?
I look into search_space_updates.append
and no more parameters are possible
I am talking about the default from get_hyperparameter_search_space()
.
I guess it is the starting point for searching over hyperparameters.
Thank you in advance
I am using develop
branch,
and as I train regression model using AutoNetRegression
,
I notice that training goes on even when loss reaches nan...
Since no further training is possible, I think AutoPyTorch should reduce the learning rate or
reduce the momentum or thing like that and stop outputing loss nan...
I am using Custom Loss but I do not think this should be any problem...
Hi,
some of our internal benchmarks showed that sometimes a PowerTransformer can substantially improve the performance of our DNNs on featurized data. @urbanmatthias could you please add that to AutoPyTorch.
Thanks,
Marius
I didn't change anything. I just run all notebook.
I am using Windows 10, cuda 10-update 2 and Python 3.7.5. I have all necessary compliers. Auto-PyTorch successfully installed and imported.
...
hyperparameter_search_space = autonet.get_hyperparameter_search_space()
...
TypeError Traceback (most recent call last)
in
3
4 # Get the ConfigSpace object with all hyperparameters, conditions, default values and default ranges
----> 5 hyperparameter_search_space = autonet.get_hyperparameter_search_space()
6
7 # Print all possible configuration options
~\Anaconda3\lib\site-packages\autopytorch-0.0.2-py3.7.egg\autoPyTorch\core\api.py in get_hyperparameter_search_space(self, X_train, Y_train, X_valid, Y_valid, **autonet_config)
101 Y_valid=Y_valid)["dataset_info"]
102
--> 103 return self.pipeline.get_hyperparameter_search_space(dataset_info=dataset_info, **pipeline_config)
104
105 @classmethod
~\Anaconda3\lib\site-packages\autopytorch-0.0.2-py3.7.egg\autoPyTorch\pipeline\base\pipeline.py in get_hyperparameter_search_space(self, dataset_info, **pipeline_config)
109 for name, node in self._pipeline_nodes.items():
110 #print("dataset_info" in pipeline_config.keys())
--> 111 config_space = node.get_hyperparameter_search_space(**pipeline_config)
112 cs.add_configuration_space(prefix=name, configuration_space=config_space, delimiter=ConfigWrapper.delimiter)
113
~\Anaconda3\lib\site-packages\autopytorch-0.0.2-py3.7.egg\autoPyTorch\pipeline\nodes\lr_scheduler_selector.py in get_hyperparameter_search_space(self, dataset_info, **pipeline_config)
56 continue
57 lr_scheduler_cs = lr_scheduler_type.get_config_space(
---> 58 **self._get_search_space_updates(prefix=lr_scheduler_name))
59 cs.add_configuration_space( prefix=lr_scheduler_name, configuration_space=lr_scheduler_cs, delimiter=ConfigWrapper.delimiter,
60 parent_hyperparameter={'parent': selector, 'value': lr_scheduler_name})
TypeError: get_config_space() got an unexpected keyword argument 'T_mult'
Dear AutoPyTorch developers,
Thanks a lot for this nice tool. I am trying this out and I seem to encounter an error with the default settings itself. Any help would be greatly appreciated.
I want to find a decent neural network architecture for an autoencoder (my input and output are the same). Here is the code, I am using,
apt = AutoNetRegression(config_preset='medium_cs',
log_level='info',
networks=['mlpnet'],
use_pynisher=False,
cuda=True,
use_tensorboard_logger=True,
best_over_epochs=True,
algorithm='hyperband',
budget_type='epochs',
max_runtime=10,
min_budget=1,
max_budget=90,
#optimize_metric = accuracy,
#loss_modules =['l1_loss'],
early_stopping_patience=10,
early_stopping_reset_parameters=True,
)
apt.fit(x_train_transform, x_train_transform, validation_split= 0.3)
My input data 'x_train_transform' is a numpy array with shape (100000, 12) and dtype float. When I run the above code, I encounter the following error within the first few seconds. It seems to arise from the 'optimize_metric' option, however I cannot choose any value other than 'mean_distance'. (max_run_time has no influence on this problem)
17:42:36 WORKER: start listening for jobs
17:42:36 [AutoNet] Start hyperband
17:42:36 DISPATCHER: started the 'discover_worker' thread
17:42:36 DISPATCHER: started the 'job_runner' thread
17:42:36 DISPATCHER: Pyro daemon running on 134.21.15.194:38097
17:42:36 DISPATCHER: discovered new worker, hpbandster.run_0.worker.biolpc81.21492.-1140535582213952
17:42:36 HBMASTER: adjusted queue size to (0, 1)
17:42:36 DISPATCHER: A new worker triggered discover_worker
17:42:36 HBMASTER: starting run at 1582130556.8857265
17:42:36 WORKER: start processing job (0, 0, 0)
17:42:36 Fit optimization pipeline
17:42:37 [AutoNet] No validation set given and either no cross validator given or budget too low for CV. Continue by splitting 0.3 of training data.
17:42:37 [AutoNet] CV split 0 of 1
The Box-Cox transformation can only be applied to strictly positive data
Using yeo-johnson instead
17:43:14 'val_mean_distance'
17:43:14 WORKER: registered result for job (0, 0, 0) with dispatcher
17:43:14 job (0, 0, 0) failed with exception
Traceback (most recent call last):
File "/home/vikrama/anaconda3/envs/fastai/lib/python3.7/site-packages/hpbandster/core/worker.py", line 206, in start_computation
result = {'result': self.compute(*args, config_id=id, **kwargs),
File "/home/vikrama/anaconda3/envs/fastai/lib/python3.7/site-packages/autoPyTorch-0.0.2-py3.7.egg/autoPyTorch/core/worker.py", line 88, in compute
result = self.optimize_pipeline(config, config_id, budget, start_time)
File "/home/vikrama/anaconda3/envs/fastai/lib/python3.7/site-packages/autoPyTorch-0.0.2-py3.7.egg/autoPyTorch/core/worker.py", line 124, in optimize_pipeline
raise e
File "/home/vikrama/anaconda3/envs/fastai/lib/python3.7/site-packages/autoPyTorch-0.0.2-py3.7.egg/autoPyTorch/core/worker.py", line 118, in optimize_pipeline
refit=False, rescore=False, hyperparameter_config_id=config_id, dataset_info=self.dataset_info)
File "/home/vikrama/anaconda3/envs/fastai/lib/python3.7/site-packages/autoPyTorch-0.0.2-py3.7.egg/autoPyTorch/pipeline/base/pipeline.py", line 60, in fit_pipeline
return self.root.fit_traverse(**kwargs)
File "/home/vikrama/anaconda3/envs/fastai/lib/python3.7/site-packages/autoPyTorch-0.0.2-py3.7.egg/autoPyTorch/pipeline/base/node.py", line 115, in fit_traverse
node.fit_output = node.fit(**required_kwargs)
File "/home/vikrama/anaconda3/envs/fastai/lib/python3.7/site-packages/autoPyTorch-0.0.2-py3.7.egg/autoPyTorch/pipeline/nodes/cross_validation.py", line 108, in fit
result = self.sub_pipeline.fit_pipeline(X=X, Y=Y, **sub_pipeline_kwargs)
File "/home/vikrama/anaconda3/envs/fastai/lib/python3.7/site-packages/autoPyTorch-0.0.2-py3.7.egg/autoPyTorch/pipeline/base/pipeline.py", line 60, in fit_pipeline
return self.root.fit_traverse(**kwargs)
File "/home/vikrama/anaconda3/envs/fastai/lib/python3.7/site-packages/autoPyTorch-0.0.2-py3.7.egg/autoPyTorch/pipeline/base/node.py", line 115, in fit_traverse
node.fit_output = node.fit(**required_kwargs)
File "/home/vikrama/anaconda3/envs/fastai/lib/python3.7/site-packages/autoPyTorch-0.0.2-py3.7.egg/autoPyTorch/pipeline/nodes/train_node.py", line 132, in fit
best_over_epochs=pipeline_config['best_over_epochs'], refit=refit, logger=logger)
File "/home/vikrama/anaconda3/envs/fastai/lib/python3.7/site-packages/autoPyTorch-0.0.2-py3.7.egg/autoPyTorch/pipeline/nodes/train_node.py", line 249, in wrap_up_training
logs=logs, train_loader=train_loader, valid_loader=valid_loader, best_over_epochs=best_over_epochs, refit=refit)
File "/home/vikrama/anaconda3/envs/fastai/lib/python3.7/site-packages/autoPyTorch-0.0.2-py3.7.egg/autoPyTorch/components/training/trainer.py", line 72, in final_eval
final_log = min(logs, key=lambda log: self.metrics[0].loss_transform(log[opt_metric_name]))
File "/home/vikrama/anaconda3/envs/fastai/lib/python3.7/site-packages/autoPyTorch-0.0.2-py3.7.egg/autoPyTorch/components/training/trainer.py", line 72, in <lambda>
final_log = min(logs, key=lambda log: self.metrics[0].loss_transform(log[opt_metric_name]))
KeyError: 'val_mean_distance'
17:43:14 HBMASTER: Timelimit reached: wait for remaining 0 jobs
17:43:14 DISPATCHER: Dispatcher shutting down
17:43:14 DISPATCHER: shut down complete
Error parsing results. Check results.json and output for more details. An empty results.json is usually caused by a misconfiguration of AutoNet.
Am I missing something?
Thanks a lot, in advance.
I came across this colab notebook that demonstrates how to use Auto-Pytorch.
https://colab.research.google.com/drive/1XhWT_crM91J1ay5BzDGWZ-vBLjyKqrEI?usp=sharing
This environment has python3.6.9. And calling the fit method triggers the errors in the images attached. I can also replicate this behavior on my local machine with python3.7.7.
I just downloaded and run the example. There isn't error while installation.
torch=='1.4.0.dev20191207'
torchvision == '0.5.0.dev20191208'
Python == 3.7.5
RuntimeError Traceback (most recent call last)
in
6 max_runtime=600,
7 save_checkpoints=True,
----> 8 images_root_folders=[os.path.abspath("../../datasets/example_images")])
~\Anaconda3\lib\site-packages\autoPyTorch\core\api.py in fit(self, X_train, Y_train, X_valid, Y_valid, refit, **autonet_config)
150
151 if "optimized_hyperparameter_config" not in self.fit_result.keys() or not self.fit_result["optimized_hyperparameter_config"]: # MODIFY
--> 152 raise RuntimeError("No models fit during training, please retry with a larger max_runtime.")
153
154 if (refit):
RuntimeError: No models fit during training, please retry with a larger max_runtime.
----------------------------------------------------------------------------------------------------------
Second error:
ValueError Traceback (most recent call last)
in
5 max_runtime=4000,
6 default_dataset_download_dir="./datasets",
----> 7 images_root_folders=["./datasets", "./datasets/example_images"])
~\Anaconda3\lib\site-packages\autoPyTorch\core\api.py in fit(self, X_train, Y_train, X_valid, Y_valid, refit, **autonet_config)
142
143 self.fit_result = self.pipeline.fit_pipeline(pipeline_config=self.autonet_config,
--> 144 X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid)
145 try:
146 self.dataset_info = self.pipeline[CreateDatasetInfo.get_name()].fit_output["dataset_info"]
~\Anaconda3\lib\site-packages\autoPyTorch\pipeline\base\pipeline.py in fit_pipeline(self, **kwargs)
58
59 def fit_pipeline(self, **kwargs):
---> 60 return self.root.fit_traverse(**kwargs)
61
62 def predict_pipeline(self, **kwargs):
~\Anaconda3\lib\site-packages\autoPyTorch\pipeline\base\node.py in fit_traverse(self, **kwargs)
113
114 # call fit method
--> 115 node.fit_output = node.fit(**required_kwargs)
116 if (not isinstance(node.fit_output, dict)):
117 raise ValueError('Node ' + str(type(node)) + ' does not return a dictionary.')
~\Anaconda3\lib\site-packages\autoPyTorch\pipeline\nodes\image\optimization_algorithm_no_timelimit.py in fit(self, pipeline_config, X_train, Y_train, X_valid, Y_valid, refit)
74 res = None
75
---> 76 config_space = self.pipeline.get_hyperparameter_search_space(**pipeline_config)
77 config_space, constants = remove_constant_hyperparameter(config_space)
78 config_space.seed(pipeline_config['random_seed'])
~\Anaconda3\lib\site-packages\autoPyTorch\pipeline\base\pipeline.py in get_hyperparameter_search_space(self, dataset_info, **pipeline_config)
109 for name, node in self._pipeline_nodes.items():
110 #print("dataset_info" in pipeline_config.keys())
--> 111 config_space = node.get_hyperparameter_search_space(**pipeline_config)
112 cs.add_configuration_space(prefix=name, configuration_space=config_space, delimiter=ConfigWrapper.delimiter)
113
~\Anaconda3\lib\site-packages\autoPyTorch\pipeline\nodes\lr_scheduler_selector.py in get_hyperparameter_search_space(self, dataset_info, **pipeline_config)
60 parent_hyperparameter={'parent': selector, 'value': lr_scheduler_name})
61
---> 62 self._check_search_space_updates((possible_lr_scheduler, "*"))
63 return cs
64
~\Anaconda3\lib\site-packages\autoPyTorch\pipeline\base\pipeline_node.py in _check_search_space_updates(self, allowed_hps)
152 if key not in exploded_allowed_hps and
153 ConfigWrapper.delimiter.join(key.split(ConfigWrapper.delimiter)[:-1] + [""]) not in exploded_allowed_hps:
--> 154 raise ValueError("Invalid search space update given: %s" % key)
155
156 def _get_search_space_updates(self, prefix=None):
ValueError: Invalid search space update given: step:step_size
I was looking at the code for the schedulers and I did not see a point where the cumulative_time
gets changed. I also looked at the rest of the repository and the only point where cumulative_time
is being calculated is at the image_trainer
, but still, not being passed to the scheduler.
I was reading papers on automl
and architecture search. There are actually lot of ways to do so, one very obvious to train different model directly.
I was wondering what this library is using ?
Currently tensorboard_logger
is used. I don't think it's actively supported anymore, they point to pytorch's own tensorboard-module on their Github.
Why bother? tensorboard_logger
uses a singleton-default-logger and I cannot reset the path to write the tensorboard-eventfiles. That would be helpful though, to distinguish between different configurations when refitting configurations from the incumbent-trajectory :)
Hi, I think AutoPyTorch is more than expected.
But I have a concern that it is not equipped with an ability to deal with custom loss,
such as quantile loss.
Is there any way I can use quantile loss with autoPyTorch?
I found that autonet.score
is different from the loss for training.
As I am using custom loss function, it would be more convenient and appropriate to have
custom score function. Anyway thank you for the package.
hi, i follow Auto-PyTorch Tutorial.ipynb and use AutoNetImageClassification, but meet TypeError error like this:
job (0, 0, 0) failed with exception Traceback (most recent call last): File "/home/root/anaconda3/lib/python3.7/site-packages/hpbandster/core/worker.py", line 206, in start_computation result = {'result': self.compute(*args, config_id=id, **kwargs), File "/home/root/anaconda3/lib/python3.7/site-packages/autoPyTorch-0.0.2-py3.7.egg/autoPyTorch/core/worker_no_timelimit.py", line 60, in compute result, randomstate = limit_train(config, budget, config_id, random.getstate()) TypeError: cannot unpack non-iterable NoneType object
Thanks for your fantastic work! Looking forward to hearing from you!
I love how Auto-Pytorch is coming along but it is hard to see how a pipeline config is being interpreted before fitting the pipeline.
For example if I am setting a non-default loss in a config file. I can do a:
selector = autonet.pipeline[LossModuleSelector.get_name()]
selected_loss = autonet.pipeline[selector.get_name()].fit_output['loss_function']
and then I can examine that to get to know that my config did what was expected but before running the pipeline something like:
autonet.pipeline.get_pipeline_config()
Shows all possible loss functions not the one selected in the config file.
Sorry if I am missing something obvious,
Ric
example/mem_test.py
and all scripts in examples/real_data
searches for datasets in <..>/Auto-PyTorch/datasets/
which are not there and thus fail.
FileNotFoundError: File b'/home/eggenspk/Work/git/Auto-PyTorch/datasets/classification/dataset_28_optdigits.csv' does not exist
This is not a critical problem, but the datasets could be downloaded automatically when run for the first time.
A declarative, efficient, and flexible JavaScript library for building user interfaces.
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google ❤️ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.