I downloaded SynthText in the Wild Dataset from official.
And then, I read official's readme.txt, however I couldn't find how many characters the dataset has.
I googled it but couldn't find it...
As you can see below example image, some symbols such like .,: and - exists. So, this dataset has alphabets(27) + numbers(10) + some symbols(?).
Does anyone know it?
I implemented own code counting up symbols.
def get_characters(basedir, imagedirname='SynthText', skip_missing=False):
class Symbols:
def __init__(self):
self.symbols = set()
def update(self, data):
self.symbols = self.symbols.union(data)
def __len__(self):
return len(self.symbols)
def __str__(self):
return ''.join(self.symbols)
symbols = Symbols()
def csvgenerator(annodir, imagedir, cbb, wBB, imname, txts, symbols, **kwargs):
image_num = kwargs.get('image_num')
i = kwargs.get('i')
imgpath = os.path.join(imagedir, imname)
img = cv2.imread(imgpath)
h, w, _ = img.shape
if not os.path.exists(imgpath):
if not skip_missing:
raise FileNotFoundError('{} was not found'.format(imgpath))
else:
logging.warning('Missing image: {}'.format(imgpath))
raise _Skip()
# convert txts to list of str
# I don't know why txts is
# ['Lines:\nI lost\nKevin ', 'will ', 'line\nand ',
# 'and\nthe ', '(and ', 'the\nout ',
# 'you ', "don't\n pkg "]
# there is strange blank and the length of txts is different from the one of wBB
txts = ' '.join(txts.tolist()).split()
text_num = len(txts)
if wBB.ndim == 2:
# convert shape=(2, 4,) to (2, 4, 1)
wBB = np.expand_dims(wBB, 2)
assert text_num == wBB.shape[2], 'The length of text and wordBB must be same, but got {} and {}'.format(
text_num, wBB.shape[2])
# replace non-alphanumeric characters with *
alltexts_asterisk = ''.join([re.sub(r'[^A-Za-z0-9]', '*', text) for text in txts])
assert len(alltexts_asterisk) == cbb.shape[
2], 'The length of characters and cbb must be same, but got {} and {}'.format(
len(alltexts_asterisk), cbb.shape[2])
for b in range(text_num):
text = txts[b]
symboltext = re.sub(r'[A-Za-z0-9]+', '', text)
symbols.update(symboltext)
sys.stdout.write('\r{}, and number is {}...{:0.1f}% ({}/{})'.format(symbols, len(symbols), 100 * (float(i + 1) / image_num), i + 1, image_num))
sys.stdout.flush()
_gtmatRecognizer(csvgenerator, basedir, imagedirname, customLog=True, symbols=symbols)
print()
print('symbols are {}, and number is {}'.format(symbols, len(symbols)))
def _gtmatRecognizer(generator, basedir, imagedirname='SynthText', customLog=False, **kwargs):
"""
convert gt.mat to https://github.com/MhLiao/TextBoxes_plusplus/blob/master/data/example.xml
<annotation>
<folder>train_images</folder>
<filename>img_10.jpg</filename>
<size>
<width>1280</width>
<height>720</height>
<depth>3</depth>
</size>
<object>
<difficult>1</difficult>
<content>###</content>
<name>text</name>
<bndbox>
<x1>1011</x1>
<y1>157</y1>
<x2>1079</x2>
<y2>160</y2>
<x3>1076</x3>
<y3>173</y3>
<x4>1011</x4>
<y4>170</y4>
<xmin>1011</xmin>
<ymin>157</ymin>
<xmax>1079</xmax>
<ymax>173</ymax>
</bndbox>
</object>
.
.
.
</annotation>
:param basedir: str, directory path under \'SynthText\'(, \'licence.txt\')
:param imagedirname: (Optional) str, image directory name including \'gt.mat\
:return:
"""
logging.basicConfig(level=logging.INFO)
imagedir = os.path.join(basedir, imagedirname)
gtpath = os.path.join(imagedir, 'gt.mat')
annodir = os.path.join(basedir, 'Annotations')
if not os.path.exists(gtpath):
raise FileNotFoundError('{} was not found'.format(gtpath))
if not os.path.exists(annodir):
# create Annotations directory
os.mkdir(annodir)
"""
ref: http://www.robots.ox.ac.uk/~vgg/data/scenetext/readme.txt
gts = dict;
__header__: bytes
__version__: str
__globals__: list
charBB: object ndarray, shape = (1, image num).
Character level bounding box. shape = (2=(x,y), 4=(top left,...: clockwise), BBox word num)
wordBB: object ndarray, shape = (1, image num).
Word level bounding box. shape = (2=(x,y), 4=(top left,...: clockwise), BBox char num)
imnames: object ndarray, shape = (1, image num, 1).
txt: object ndarray, shape = (i, image num).
Text. shape = (word num)
"""
logging.info('Loading {} now.\nIt may take a while.'.format(gtpath))
gts = sio.loadmat(gtpath)
logging.info('Loaded\n'.format(gtpath))
charBB = gts['charBB'][0]
wordBB = gts['wordBB'][0]
imnames = gts['imnames'][0]
texts = gts['txt'][0]
image_num = imnames.size
for i, (cbb, wBB, imname, txts) in enumerate(zip(charBB, wordBB, imnames, texts)):
imname = imname[0]
try:
generator(annodir, imagedir, cbb, wBB, imname, txts, i=i, image_num=image_num, **kwargs)
except _Skip:
pass
if not customLog:
sys.stdout.write('\rGenerating... {:0.1f}% ({}/{})'.format(100 * (float(i + 1) / image_num), i + 1, image_num))
sys.stdout.flush()
print()
logging.info('Finished!!!')
Finally, I got the symbol number.
It appears that ASCII printable characters without space.
INFO:root:Loading ~/data/text/SynthText/SynthText/gt.mat now.
It may take a while.
INFO:root:Loaded
}&|%_(],$^{+?##/-`).<=;~['>:\!"*, and number is 32...100.0% (858750/858750)
INFO:root:Finished!!!
symbols are }&|%_(],$^{+?##/-`).<=;~['>:\!"*, and number is 32
Related
I am trying to do a batch transform on a training dataset in an S3 bucket. I have followed this link:
https://github.com/aws-samples/quicksight-sagemaker-integration-blog
The training data on which transformation is being applied is of ~35 MB.
I am getting these errors:
Bad HTTP status received from algorithm: 500
The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.
Process followed:
1. s3_input_train = sagemaker.TrainingInput(s3_data='s3://{}/{}/rawtrain/'.format(bucket, prefix), content_type='csv')
2. from sagemaker.sklearn.estimator import SKLearn
sagemaker_session = sagemaker.Session()
script_path = 'preprocessing.py'
sklearn_preprocessor = SKLearn(
entry_point=script_path,
role=role,
train_instance_type="ml.c4.xlarge",
framework_version='0.20.0',
py_version = 'py3',
sagemaker_session=sagemaker_session)
sklearn_preprocessor.fit({'train': s3_input_train})
3. transform_train_output_path = 's3://{}/{}/{}/'.format(bucket, prefix, 'transformtrain-train-output')
scikit_learn_inferencee_model = sklearn_preprocessor.create_model(env={'TRANSFORM_MODE': 'feature-transform'})
transformer_train = scikit_learn_inferencee_model.transformer(
instance_count=1,
assemble_with = 'Line',
output_path = transform_train_output_path,
accept = 'text/csv',
strategy = "MultiRecord",
max_payload =40,
instance_type='ml.m4.xlarge')
4. Preprocess training input
transformer_train.transform(s3_input_train.config['DataSource']['S3DataSource']['S3Uri'],
content_type='text/csv',
split_type = "Line")
print('Waiting for transform job: ' + transformer_train.latest_transform_job.job_name)
transformer_train.wait()
preprocessed_train_path = transformer_train.output_path + transformer_train.latest_transform_job.job_name
preprocessing.py
from __future__ import print_function
import time
import sys
from io import StringIO
import os
import shutil
import argparse
import csv
import json
import numpy as np
import pandas as pd
import logging
from sklearn.compose import ColumnTransformer
from sklearn.externals import joblib
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Binarizer, StandardScaler, OneHotEncoder
from sagemaker_containers.beta.framework import (
content_types, encoders, env, modules, transformer, worker)
# Specifying the column names here.
feature_columns_names = [
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'J',
'K'
]
label_column = 'ab'
feature_columns_dtype = {
'A' : str,
'B' : np.float64,
'C' : np.float64,
'D' : str,
"E" : np.float64,
'F' : str,
'G' : str,
'H' : np.float64,
'I' : str,
'J' : str,
'K': str,
}
label_column_dtype = {'ab': np.int32}
def merge_two_dicts(x, y):
z = x.copy() # start with x's keys and values
z.update(y) # modifies z with y's keys and values & returns None
return z
def _is_inverse_label_transform():
"""Returns True if if it's running in inverse label transform."""
return os.getenv('TRANSFORM_MODE') == 'inverse-label-transform'
def _is_feature_transform():
"""Returns True if it's running in feature transform mode."""
return os.getenv('TRANSFORM_MODE') == 'feature-transform'
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# Sagemaker specific arguments. Defaults are set in the environment variables.
parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])
args = parser.parse_args()
input_files = [ os.path.join(args.train, file) for file in os.listdir(args.train) ]
if len(input_files) == 0:
raise ValueError(('There are no files in {}.\n' +
'This usually indicates that the channel ({}) was incorrectly specified,\n' +
'the data specification in S3 was incorrectly specified or the role specified\n' +
'does not have permission to access the data.').format(args.train, "train"))
raw_data = [ pd.read_csv(
file,
header=None,
names=feature_columns_names + [label_column],
dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype)) for file in input_files ]
concat_data = pd.concat(raw_data)
numeric_features = list([
'B',
'C',
'E',
'H'
])
numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())])
categorical_features = list(['A','D','F','G','I','J','K'])
categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numeric_features),
('cat', categorical_transformer, categorical_features)],
remainder="drop")
preprocessor.fit(concat_data)
joblib.dump(preprocessor, os.path.join(args.model_dir, "model.joblib"))
print("saved model!")
def input_fn(input_data, request_content_type):
"""Parse input data payload
We currently only take csv input. Since we need to process both labelled
and unlabelled data we first determine whether the label column is present
by looking at how many columns were provided.
"""
content_type = request_content_type.lower(
) if request_content_type else "text/csv"
content_type = content_type.split(";")[0].strip()
if isinstance(input_data, str):
str_buffer = input_data
else:
str_buffer = str(input_data,'utf-8')
if _is_feature_transform():
if content_type == 'text/csv':
# Read the raw input data as CSV.
df = pd.read_csv(StringIO(input_data), header=None)
if len(df.columns) == len(feature_columns_names) + 1:
# This is a labelled example, includes the label
df.columns = feature_columns_names + [label_column]
elif len(df.columns) == len(feature_columns_names):
# This is an unlabelled example.
df.columns = feature_columns_names
return df
else:
raise ValueError("{} not supported by script!".format(content_type))
if _is_inverse_label_transform():
if (content_type == 'text/csv' or content_type == 'text/csv; charset=utf-8'):
# Read the raw input data as CSV.
df = pd.read_csv(StringIO(str_buffer), header=None)
if len(df.columns) == len(feature_columns_names) + 1:
# This is a labelled example, includes the ring label
df.columns = feature_columns_names + [label_column]
elif len(df.columns) == len(feature_columns_names):
# This is an unlabelled example.
df.columns = feature_columns_names
return df
else:
raise ValueError("{} not supported by script!".format(content_type))
def output_fn(prediction, accept):
"""Format prediction output
The default accept/content-type between containers for serial inference is JSON.
We also want to set the ContentType or mimetype as the same value as accept so the next
container can read the response payload correctly.
"""
accept = 'text/csv'
if type(prediction) is not np.ndarray:
prediction=prediction.toarray()
if accept == "application/json":
instances = []
for row in prediction.tolist():
instances.append({"features": row})
json_output = {"instances": instances}
return worker.Response(json.dumps(json_output), mimetype=accept)
elif accept == 'text/csv':
return worker.Response(encoders.encode(prediction, accept), mimetype=accept)
else:
raise RuntimeException("{} accept type is not supported by this script.".format(accept))
def predict_fn(input_data, model):
"""Preprocess input data
We implement this because the default predict_fn uses .predict(), but our model is a preprocessor
so we want to use .transform().
The output is returned in the following order:
rest of features either one hot encoded or standardized
"""
if _is_feature_transform():
features = model.transform(input_data)
if label_column in input_data:
# Return the label (as the first column) and the set of features.
return np.insert(features.toarray(), 0, pd.get_dummies(input_data[label_column])['True.'], axis=1)
else:
# Return only the set of features
return features
if _is_inverse_label_transform():
features = input_data.iloc[:,0]>0.5
features = features.values
return features
def model_fn(model_dir):
"""Deserialize fitted model
"""
if _is_feature_transform():
preprocessor = joblib.load(os.path.join(model_dir, "model.joblib"))
return preprocessor
Please help.
As I can see you are referring to a post that is pretty old and has issues open on Github here with regards to the Input source and the configurations.
I encourage you to check out the latest examples here which shows how to visualize Amazon SageMaker machine learning predictions with Amazon QuickSight.
Additionally, if the problem persists please open a service request with AWS Support with job ARN to investigate future on the Internal Server Error.
Tensorflow: convert PrefetchDataset to BatchDataset
With latest Tensorflow version 2.3.1I am trying to follow basic text classification example at: https://www.tensorflow.org/tutorials/keras/text_classification. Instead of creating dataset from directory as example does, I am using a csv file:
SELECT_COLUMNS = ['SentimentText','Sentiment']
LABEL_COLUMN = 'Sentiment'
LABELS = [0, 1]
def get_dataset(file_path, **kwargs):
dataset = tf.data.experimental.make_csv_dataset(
file_path,
batch_size=3, # Artificially small to make examples easier to show.
label_name=LABEL_COLUMN,
na_value="?",
num_epochs=1,
ignore_errors=True,
**kwargs)
return dataset
all_data = get_dataset(data_path, select_columns=SELECT_COLUMNS)
As a result I get:
type(all_data)
tensorflow.python.data.ops.dataset_ops.PrefetchDataset
Example loads data from directory with:
batch_size = 32
seed = 42
raw_train_ds = tf.keras.preprocessing.text_dataset_from_directory(
'aclImdb/train',
batch_size=batch_size,
validation_split=0.2,
subset='training',
seed=seed)
And gets dataset of another type:
type(raw_train_ds)
tensorflow.python.data.ops.dataset_ops.BatchDataset
Now when I try to standardise and vectorise data with functions from example:
def custom_standardization(input_data):
lowercase = tf.strings.lower(input_data)
stripped_html = tf.strings.regex_replace(lowercase, '<br />', ' ')
return tf.strings.regex_replace(stripped_html,
'[%s]' % re.escape(string.punctuation),
'')
max_features = 10000
sequence_length = 250
vectorize_layer = TextVectorization(
standardize=custom_standardization,
max_tokens=max_features,
output_mode='int',
output_sequence_length=sequence_length)
And apply them to my dataset I get error:
# Make a text-only dataset (without labels), then call adapt
train_text = all_data.map(lambda x, y: x)
vectorize_layer.adapt(train_text)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-20-1f1fc445912d> in <module>
1 # Make a text-only dataset (without labels), then call adapt
2 train_text = all_data.map(lambda x, y: x)
----> 3 vectorize_layer.adapt(train_text)
/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/layers/preprocessing/text_vectorization.py in adapt(self, data, reset_state)
378 shape = dataset_ops.get_legacy_output_shapes(data)
379 if not isinstance(shape, tensor_shape.TensorShape):
--> 380 raise ValueError("The dataset passed to 'adapt' must contain a single "
381 "tensor value.")
382 if shape.rank == 0:
ValueError: The dataset passed to 'adapt' must contain a single tensor value.
How to convert PrefetchDataset to BatchDataset ?
You could use tf.stack method to pack the features into a single array. The below function is from Custom training: walkthrough in Tensorflow.
def pack_features_vector(features, labels):
features = tf.stack(list(features.values()), axis=1)
return features, labels
all_data = get_dataset(data_path, select_columns=SELECT_COLUMNS)
train_dataset = all_data.map(pack_features_vector)
train_text = train_dataset.map(lambda x, y: x)
vectorize_layer.adapt(train_text)
I have used the example described here (http://openmdao.readthedocs.org/en/1.5.0/usr-guide/tutorials/doe-drivers.html?highlight=driver) to show my problem. I want to use the same approach for one component were "params" are array and no longer float . See example below
from openmdao.api import IndepVarComp, Group, Problem, ScipyOptimizer, ExecComp, DumpRecorder, Component
from openmdao.drivers.latinhypercube_driver import LatinHypercubeDriver, OptimizedLatinHypercubeDriver
import numpy as np
class Paraboloid(Component):
""" Evaluates the equation f(x,y) = (x-3)^2 + xy + (y+4)^2 - 3 """
def __init__(self):
super(Paraboloid, self).__init__()
self.add_param('x', val=0.0)
self.add_param('y', val=0.0)
self.add_output('f_xy', val=0.0)
def solve_nonlinear(self, params, unknowns, resids):
"""f(x,y) = (x-3)^2 + xy + (y+4)^2 - 3
"""
x = params['x']
y = params['y']
unknowns['f_xy'] = (x-3.0)**2 + x*y + (y+4.0)**2 - 3.0
def linearize(self, params, unknowns, resids):
#""" Jacobian for our paraboloid."""
x = params['x']
y = params['y']
J = {}
J['f_xy', 'x'] = 2.0*x - 6.0 + y
J['f_xy', 'y'] = 2.0*y + 8.0 + x
return J
class ParaboloidArray(Component):
""" Evaluates the equation f(x,y) = (x-3)^2 + xy + (y+4)^2 - 3 """
def __init__(self):
super(ParaboloidArray, self).__init__()
self.add_param('X', val=np.array([0., 0.]))
self.add_output('f_xy', val=0.0)
def solve_nonlinear(self, params, unknowns, resids):
"""f(x,y) = (x-3)^2 + xy + (y+4)^2 - 3
"""
x = params['X'][0]
y = params['y'][1]
unknowns['f_xy'] = (x-3.0)**2 + x*y + (y+4.0)**2 - 3.0
top = Problem()
root = top.root = Group()
root.add('p1', IndepVarComp('x', 50.0), promotes=['*'])
root.add('p2', IndepVarComp('y', 50.0), promotes=['*'])
root.add('comp', Paraboloid(), promotes=['*'])
top.driver = OptimizedLatinHypercubeDriver(num_samples=4, seed=0, population=20, generations=4, norm_method=2)
top.driver.add_desvar('x', lower=-50.0, upper=50.0)
top.driver.add_desvar('y', lower=-50.0, upper=50.0)
top.driver.add_objective('f_xy')
top.setup()
top.run()
top.cleanup()
###########################
print("case float ok")
top = Problem()
root = top.root = Group()
root.add('p1', IndepVarComp('X', np.array([50., 50.])), promotes=['*'])
root.add('comp', ParaboloidArray(), promotes=['*'])
top.driver = OptimizedLatinHypercubeDriver(num_samples=4, seed=0, population=20, generations=4, norm_method=2)
top.driver.add_desvar('X', lower=np.array([-50., -50.]), upper=np.array([50., 50.]))
top.driver.add_objective('f_xy')
top.setup()
top.run()
top.cleanup()
I obtain the following error :
Traceback (most recent call last):
File "C:\Program Files (x86)\Wing IDE 101 5.0\src\debug\tserver\_sandbox.py", line 102, in <module>
File "D:\tlefeb\Anaconda2\Lib\site-packages\openmdao\core\problem.py", line 1038, in run
self.driver.run(self)
File "D:\tlefeb\Anaconda2\Lib\site-packages\openmdao\drivers\predeterminedruns_driver.py", line 108, in run
for run in runlist:
File "D:\tlefeb\Anaconda2\Lib\site-packages\openmdao\drivers\latinhypercube_driver.py", line 57, in _build_runlist
design_var_buckets = self._get_buckets(bounds['lower'], bounds['upper'])
File "D:\tlefeb\Anaconda2\Lib\site-packages\openmdao\drivers\latinhypercube_driver.py", line 101, in _get_buckets
bucket_walls = np.linspace(low, high, self.num_samples + 1)
File "D:\tlefeb\Anaconda2\Lib\site-packages\numpy\core\function_base.py", line 102, in linspace
if step == 0:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Did I misunderstood something in my way of coding ?
I get a different error than you, using the the latest OpenMDAO master, but I get an error non-the-less. There isn't anything wrong with the mode, but rather there are some bugs with using array variables for DOEs. I've added a bug-fix story to the OpenMDAO backlog, which we'll hopefully be able to deal with in the next couple weeks. We'd gladly accept a pull request if you develop a fix before we get to it though.
I am setting up an optimization in OpenMDAO v0.13 using several components that are used many times. My assembly seems to be working just fine with the default driver, but when I run with an optimizer it does not solve. The optimizer simply runs with the inputs given and returns the answer using those inputs. I am not sure what the issue is, but I would appreciate any insights. I have included a simple code mimicking my structure that reproduces the error. I think the problem is in the connections, summer.fs does not update after initialization.
from openmdao.main.api import Assembly, Component
from openmdao.lib.datatypes.api import Float, Array, List
from openmdao.lib.drivers.api import DOEdriver, SLSQPdriver, COBYLAdriver, CaseIteratorDriver
from pyopt_driver.pyopt_driver import pyOptDriver
import numpy as np
class component1(Component):
x = Float(iotype='in')
y = Float(iotype='in')
term1 = Float(iotype='out')
a = Float(iotype='in', default_value=1)
def execute(self):
x = self.x
a = self.a
term1 = a*x**2
self.term1 = term1
print "In comp1", self.name, self.a, self.x, self.term1
def list_deriv_vars(self):
return ('x',), ('term1',)
def provideJ(self):
x = self.x
a = self.a
dterm1_dx = 2.*a*x
J = np.array([[dterm1_dx]])
print 'In comp1, J = %s' % J
return J
class component2(Component):
x = Float(iotype='in')
y = Float(iotype='in')
term1 = Float(iotype='in')
f = Float(iotype='out')
def execute(self):
y = self.y
x = self.x
term1 = self.term1
f = term1 + x + y**2
self.f = f
print "In comp2", self.name, self.x, self.y, self.term1, self.f
class summer(Component):
total = Float(iotype='out', desc='sum of all f values')
def __init__(self, size):
super(summer, self).__init__()
self.size = size
self.add('fs', Array(np.ones(size), iotype='in', desc='f values from all cases'))
def execute(self):
self.total = sum(self.fs)
print 'In summer, fs = %s and total = %s' % (self.fs, self.total)
class assembly(Assembly):
x = Float(iotype='in')
y = Float(iotype='in')
total = Float(iotype='out')
def __init__(self, size):
super(assembly, self).__init__()
self.size = size
self.add('a_vals', Array(np.zeros(size), iotype='in', dtype='float'))
self.add('fs', Array(np.zeros(size), iotype='out', dtype='float'))
print 'in init a_vals = %s' % self.a_vals
def configure(self):
# self.add('driver', SLSQPdriver())
self.add('driver', pyOptDriver())
self.driver.optimizer = 'SNOPT'
# self.driver.pyopt_diff = True
#create this first, so we can connect to it
self.add('summer', summer(size=len(self.a_vals)))
self.connect('summer.total', 'total')
print 'in configure a_vals = %s' % self.a_vals
# create instances of components
for i in range(0, self.size):
c1 = self.add('comp1_%d'%i, component1())
c1.missing_deriv_policy = 'assume_zero'
c2 = self.add('comp2_%d'%i, component2())
self.connect('a_vals[%d]' % i, 'comp1_%d.a' % i)
self.connect('x', ['comp1_%d.x'%i, 'comp2_%d.x'%i])
self.connect('y', ['comp1_%d.y'%i, 'comp2_%d.y'%i])
self.connect('comp1_%d.term1'%i, 'comp2_%d.term1'%i)
self.connect('comp2_%d.f'%i, 'summer.fs[%d]'%i)
self.driver.workflow.add(['comp1_%d'%i, 'comp2_%d'%i])
self.connect('summer.fs[:]', 'fs[:]')
self.driver.workflow.add(['summer'])
# set up main driver (optimizer)
self.driver.iprint = 1
self.driver.maxiter = 100
self.driver.accuracy = 1.0e-6
self.driver.add_parameter('x', low=-5., high=5.)
self.driver.add_parameter('y', low=-5., high=5.)
self.driver.add_objective('summer.total')
if __name__ == "__main__":
""" the result should be -1 at (x, y) = (-0.5, 0) """
import time
from openmdao.main.api import set_as_top
a_vals = np.array([1., 1., 1., 1.])
test = set_as_top(assembly(size=len(a_vals)))
test.a_vals = a_vals
print test.a_vals
test.x = 2.
test.y = 2.
tt = time.time()
test.run()
print "Elapsed time: ", time.time()-tt, "seconds"
print 'result = ', test.summer.total
print '(x, y) = (%s, %s)' % (test.x, test.y)
print test.fs
I played around with your model, and found that the following line caused problems:
#self.connect('summer.fs[:]', 'fs[:]')
When I commented it out, I got the optimization to move.
I am not sure what is happening there, but the graph transformations sometimes have some issues with component input nodes that are promoted as outputs on the assembly boundary. If you still want those values to be available on the assembly, you could try promoting the outputs from the comp2_n components instead.
I'm trying to refactor my little voting functions. I've got two of them which do almost exactly the same thing, only to different model fields. Voters can add stars to a song if they like it, and they can add flags to a song if they dislike it. The voters are then associated with the song via a many2many relationship, so it can be determined if they already voted on that song. If so, a star (or a flag) is removed from the song and the relationship between voter and song is removed also.
Here is the function that adds or removes stars:
# models.py
class UserProfile(models.Model):
user = models.ForeignKey(User, unique = True, related_name = 'profile')
liked_songs = models.ManyToManyField('Song', blank = True, null = True)
disliked_songs = models.ManyToManyField('Song', blank = True, null = True)
class Song(models.Model):
song_id = models.CharField(max_length = 36, primary_key = True)
last_accessed = models.DateTimeField()
stars = models.IntegerField(max_length = 5, default = 0)
flags = models.IntegerField(max_length = 5, default = 0)
# views.py
def vote(request, song_id, task): # 'task' should be either 'stars' or 'flags'
song = Song.objects.get(song_id = song_id)
voter = request.user.get_profile()
# Voter does not already like the song
if not voter.liked_songs.filter(song_id = song.song_id): # (*)
try:
# Increase vote, set 'last_accessed', add Song to UserProfile.liked_songs
Song.objects.filter(song_id = song_id).update(
stars = F('stars') + 1, # (*)
last_accessed = datetime.datetime.now()
)
voter.liked_songs.add(song) # (*)
except:
raise
else:
try:
# Decrease vote, set 'last_accessed', remove Song from UserProfile.liked_songs
Song.objects.filter(song_id = song_id).update(
stars = F('stars') - 1, # (*)
last_accessed = datetime.datetime.now()
)
voter.liked_songs.remove(song) # (*)
except:
raise
return "Done."
The function that adds or removes flags is exactly the same, except for the marked (*) lines:
if not voter.disliked_songs.filter(song_id = song.song_id)
flags = F('flags') + 1,
flags = F('flags') - 1,
voter.disliked_songs.add(song)
voter.disliked_songs.remove(song)
Now here's my first question: What do I have to do to use only one function for liking and disliking a song? I have already introduced the argument task but I can't seem to write task = F('task') + 1,, because there is no field name 'task' in the Song model.
Bonus question: The only difference between stars = F('stars') + 1, and stars = F('stars') - 1, is the operator (+ or -). Is there a way to have an operator in a variable, i.e. task = F('task') myoperator 1,?
Answer for Bonus question: Why dont you have a function that accepts the operator as argument and evaluates the expression based on argument?, like the following example:
>>> def f(op):
... return (eval("3"+op+"2"))
>>> f("+")
5
>>> f("-")
1