Sentiment140 Preprocessing - database

I have been trying to do some preprocessing on the Sentiment140 database on Kaggle: https://www.kaggle.com/kazanova/sentiment140
The code I'm using is this:
import os
from nltk.stem.lancaster import LancasterStemmer
from nltk.tokenize import RegexpTokenizer
Base_location = ''
dataset_location = os.path.join(Base_location, 'Sentiment140.csv')
corpus = []
labels = []
# Parse tweets and sentiments
with open(dataset_location, 'r', encoding='latin-1') as df:
for i, line in enumerate(df):
parts = line.strip().split(',')
# Sentiment (0 = Negative, 1 = Positive)
labels.append(str(parts[0].strip()))
# Tweet
tweet = parts[5].strip()
if tweet.startswith('"'):
tweet = tweet[1:]
if tweet.endswith('"'):
tweet = tweet[::-1]
corpus.append(tweet.strip().lower())
print('Corpus size: {}'.format(len(corpus)))
# Tokenize and stem
tkr = RegexpTokenizer('[a-zA-Z0-9#]+')
stemmer = LancasterStemmer()
tokenized_corpus = []
for i, tweet in enumerate(corpus):
tokens = [stemmer.stem(t) for t in tkr.tokenize(tweet) if not t.startswith('#')]
tokenized_corpus.append(tokens)
print(tokenized_corpus)
However, I keep getting this error:
TypeError: '_io.TextIOWrapper' object is not subscriptable
Can anyone help me understand how to solve the issue?
Thanks in advance

TL;DR
To read .csv or structured datasets, use pandas https://pandas.pydata.org/ or any other dataframe libraries.
In Long:
Instead of doing:
Base_location = ''
dataset_location = os.path.join(Base_location, 'Sentiment140.csv')
corpus = []
labels = []
# Parse tweets and sentiments
with open(dataset_location, 'r', encoding='latin-1') as df:
for i, line in enumerate(df):
parts = line.strip().split(',')
# Sentiment (0 = Negative, 1 = Positive)
labels.append(str(parts[0].strip()))
# Tweet
tweet = parts[5].strip()
if tweet.startswith('"'):
tweet = tweet[1:]
if tweet.endswith('"'):
tweet = tweet[::-1]
corpus.append(tweet.strip().lower())
You could simply read the .csv file with pandas, e.g.
import pandas as pd
corpus = pd.read_csv('training.1600000.processed.noemoticon.csv', encoding='latin-1')
Then use the .apply() function to process the tweets:
"""
Columns
====
target: the polarity of the tweet (0 = negative, 2 = neutral, 4 = positive)
ids: The id of the tweet ( 2087)
date: the date of the tweet (Sat May 16 23:58:44 UTC 2009)
flag: The query (lyx). If there is no query, then this value is NO_QUERY.
user: the user that tweeted (robotickilldozr)
text: the text of the tweet (Lyx is cool)
"""
from nltk.stem.lancaster import LancasterStemmer
from nltk.tokenize import RegexpTokenizer
import pandas as pd
df = pd.read_csv('training.1600000.processed.noemoticon.csv',
header=None,
names=['target', 'ids', 'date', 'flag', 'user', 'text'],
encoding='latin-1')
tokenizer = RegexpTokenizer('[a-zA-Z0-9#]+')
stemmer = LancasterStemmer()
def process_tweet(tweet):
return [stemmer.stem(token) if not token.startswith('#') else token
for token in tokenizer.tokenize(tweet)]
# 1. Cast the column type to string
# 2. Lowercase it
# 3. Iterate throw each row and get the output from process_tweet()
# 4. # 3. Keep in a new column call `tokenized_text`
df['tokenized_text']= df['text'].str.lower().apply(process_tweet)

Related

I'm trying to convert Pandas dataframe to HuggingFace DatasetDic

I have a pandas dataframe with 20k rows containing 2 columns named English, te. Changed the English column name to en. Trying to split the dataset into train, validation and test. And, I want to convert that dataset into
raw_datasets
the output i'm expecting is
DatasetDict({
train: Dataset({
features: ['translation'],
num_rows: 18000
})
validation: Dataset({
features: ['translation'],
num_rows: 1000
})
test: Dataset({
features: ['translation'],
num_rows: 1000
})
})
I'm trying to write a code like raw_datasets["train"][0], then it should return output like below
{'translation': {'en': 'Membership of Parliament: see Minutes',
'to': 'Componenţa Parlamentului: a se vedea procesul-verbal'}}
The data must be in DatasetDict, similar to if we load data from huggingface like dataset DatasetDict type. Below is the code i've written but it's not working
import pandas as pd
from collections import namedtuple
Dataset = namedtuple('Dataset', ['features', 'num_rows'])
DatasetDict = namedtuple('DatasetDict', ['train', 'validation', 'test'])
def create_dataset_dict(df):
# Rename the column
df = df.rename(columns={'English': 'en'})
# Split the data into train, validation and test
train_df = df.iloc[:18000, :]
validation_df = df.iloc[18000:19000, :]
test_df = df.iloc[19000:, :]
# Create the dataset dictionaries
train = Dataset(features=['translation'], num_rows=18000)
validation = Dataset(features=['translation'], num_rows=1000)
test = Dataset(features=['translation'], num_rows=1052)
# Create the final dataset dictionary
datasets = DatasetDict(train=train, validation=validation, test=test)
return datasets
def preprocess_dataset(df):
df = df.rename(columns={'English': 'en'})
train_df = df.iloc[:18000, :]
validation_df = df.iloc[18000:19000, :]
test_df = df.iloc[19000:, :]
train_dict = [{'translation': {'en': row['en'], 'te': row['te']}} for _, row in train_df.iterrows()]
validation_dict = [{'translation': {'en': row['en'], 'te': row['te']}} for _, row in validation_df.iterrows()]
test_dict = [{'translation': {'en': row['en'], 'te': row['te']}} for _, row in test_df.iterrows()]
return DatasetDict(train=train_dict, validation=validation_dict, test=test_dict)
df = pd.read_csv('eng-to-te.csv')
raw_datasets = preprocess_dataset(df)
The above code is not working. Can anyone help me with this?

Problem with saving pickle object into arrays from images in python

I have the following class for loading and converting my images into train and test arrays for a deep learning model in Tensorflow 2.
The images are in three folders, named 'Car', 'Cat' and 'Man', which are within the Train and Test main folders. Each image is of 300 x 400 pixels.
import os
import pickle
import cv2
import numpy as np
os.getcwd()
out: 'C:\\Users\\me\\Jupiter_Notebooks\\Dataset_Thermal\\SeekThermal'
path_train = "../SeekThermal/Train"
path_test = "../SeekThermal/Test"
class MasterImage(object):
def __init__(self,PATH='', IMAGE_SIZE = 50):
self.PATH = PATH
self.IMAGE_SIZE = IMAGE_SIZE
self.image_data = []
self.x_data = []
self.y_data = []
self.CATEGORIES = []
# This will get List of categories
self.list_categories = []
def get_categories(self):
for path in os.listdir(self.PATH):
if '.DS_Store' in path:
pass
else:
self.list_categories.append(path)
print("Found Categories ",self.list_categories,'\n')
return self.list_categories
def process_image(self):
try:
"""
Return Numpy array of image
:return: X_Data, Y_Data
"""
self.CATEGORIES = self.get_categories()
for categories in self.CATEGORIES: # Iterate over categories
train_folder_path = os.path.join(self.PATH, categories) # Folder Path
class_index = self.CATEGORIES.index(categories) # this will get index for classification
for img in os.listdir(train_folder_path): # This will iterate in the Folder
new_path = os.path.join(train_folder_path, img) # image Path
try: # if any image is corrupted
image_data_temp = cv2.imread(new_path) # Read Image as numbers
image_temp_resize = cv2.resize(image_data_temp,(self.IMAGE_SIZE,self.IMAGE_SIZE))
self.image_data.append([image_temp_resize,class_index])
random.shuffle(self.image_data)
except:
pass
data = np.asanyarray(self.image_data) # or, data = np.asanyarray(self.image_data,dtype=object)
# Iterate over the Data
for x in data:
self.x_data.append(x[0]) # Get the X_Data
self.y_data.append(x[1]) # get the label
X_Data = np.asarray(self.x_data) / (255.0) # Normalize Data
Y_Data = np.asarray(self.y_data)
# reshape x_Data
X_Data = X_Data.reshape(-1, self.IMAGE_SIZE, self.IMAGE_SIZE, 3)
return X_Data, Y_Data
except:
print("Failed to run Function Process Image ")
def pickle_image(self):
"""
:return: None Creates a Pickle Object of DataSet
"""
# Call the Function and Get the Data
X_Data,Y_Data = self.process_image()
# Write the Entire Data into a Pickle File
pickle_out = open('X_Data','wb')
pickle.dump(X_Data, pickle_out)
pickle_out.close()
# Write the Y Label Data
pickle_out = open('Y_Data', 'wb')
pickle.dump(Y_Data, pickle_out)
pickle_out.close()
print("Pickled Image Successfully ")
return X_Data,Y_Data
def load_dataset(self):
try:
# Read the Data from Pickle Object
X_Temp = open('..\SeekThermal\X_Data','rb')
X_Data = pickle.load(X_Temp)
Y_Temp = open('..\SeekThermal\Y_Data','rb')
Y_Data = pickle.load(Y_Temp)
print('Reading Dataset from Pickle Object')
return X_Data,Y_Data
except:
print('Could not Found Pickle File ')
print('Loading File and Dataset ..........')
X_Data,Y_Data = self.pickle_image()
return X_Data,Y_Data
I dont understand what the problem is with the pickle file, because just last week I able to create these arrays successfully with the same code??
Is there an easier way to load images in Tensorflow rather than through the custom defined class?
a = MasterImage(PATH = path_train,IMAGE_SIZE = 224)
a.process_image()
out:
it produces an array with a warning.
VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
data = np.asanyarray(self.image_data)
a.pickle_image()
out:
TypeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_1692\507657192.py in <cell line: 1>()
----> 1 a.pickle_image()
~\AppData\Local\Temp\ipykernel_1692\1410849712.py in pickle_image(self)
71 """
72 # Call the Function and Get the Data
---> 73 X_Data,Y_Data = self.process_image()
74
75 # Write the Entire Data into a Pickle File
TypeError: cannot unpack non-iterable NoneType object
a.load_dataset()
out:
Could not Found Pickle File
Loading File and Dataset ..........
Found Categories ['Car', 'Cat', 'Man', 'Car', 'Cat', 'Man']
Pickled Image Successfully
I'm running Python 3.8.8 via anaconda on Windows 10. Thank you for any advice.

Sagemaker - batch transform] Internal server error : 500

I am trying to do a batch transform on a training dataset in an S3 bucket. I have followed this link:
https://github.com/aws-samples/quicksight-sagemaker-integration-blog
The training data on which transformation is being applied is of ~35 MB.
I am getting these errors:
Bad HTTP status received from algorithm: 500
The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.
Process followed:
1. s3_input_train = sagemaker.TrainingInput(s3_data='s3://{}/{}/rawtrain/'.format(bucket, prefix), content_type='csv')
2. from sagemaker.sklearn.estimator import SKLearn
sagemaker_session = sagemaker.Session()
script_path = 'preprocessing.py'
sklearn_preprocessor = SKLearn(
entry_point=script_path,
role=role,
train_instance_type="ml.c4.xlarge",
framework_version='0.20.0',
py_version = 'py3',
sagemaker_session=sagemaker_session)
sklearn_preprocessor.fit({'train': s3_input_train})
3. transform_train_output_path = 's3://{}/{}/{}/'.format(bucket, prefix, 'transformtrain-train-output')
scikit_learn_inferencee_model = sklearn_preprocessor.create_model(env={'TRANSFORM_MODE': 'feature-transform'})
transformer_train = scikit_learn_inferencee_model.transformer(
instance_count=1,
assemble_with = 'Line',
output_path = transform_train_output_path,
accept = 'text/csv',
strategy = "MultiRecord",
max_payload =40,
instance_type='ml.m4.xlarge')
4. Preprocess training input
transformer_train.transform(s3_input_train.config['DataSource']['S3DataSource']['S3Uri'],
content_type='text/csv',
split_type = "Line")
print('Waiting for transform job: ' + transformer_train.latest_transform_job.job_name)
transformer_train.wait()
preprocessed_train_path = transformer_train.output_path + transformer_train.latest_transform_job.job_name
preprocessing.py
from __future__ import print_function
import time
import sys
from io import StringIO
import os
import shutil
import argparse
import csv
import json
import numpy as np
import pandas as pd
import logging
from sklearn.compose import ColumnTransformer
from sklearn.externals import joblib
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Binarizer, StandardScaler, OneHotEncoder
from sagemaker_containers.beta.framework import (
content_types, encoders, env, modules, transformer, worker)
# Specifying the column names here.
feature_columns_names = [
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'J',
'K'
]
label_column = 'ab'
feature_columns_dtype = {
'A' : str,
'B' : np.float64,
'C' : np.float64,
'D' : str,
"E" : np.float64,
'F' : str,
'G' : str,
'H' : np.float64,
'I' : str,
'J' : str,
'K': str,
}
label_column_dtype = {'ab': np.int32}
def merge_two_dicts(x, y):
z = x.copy() # start with x's keys and values
z.update(y) # modifies z with y's keys and values & returns None
return z
def _is_inverse_label_transform():
"""Returns True if if it's running in inverse label transform."""
return os.getenv('TRANSFORM_MODE') == 'inverse-label-transform'
def _is_feature_transform():
"""Returns True if it's running in feature transform mode."""
return os.getenv('TRANSFORM_MODE') == 'feature-transform'
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# Sagemaker specific arguments. Defaults are set in the environment variables.
parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])
args = parser.parse_args()
input_files = [ os.path.join(args.train, file) for file in os.listdir(args.train) ]
if len(input_files) == 0:
raise ValueError(('There are no files in {}.\n' +
'This usually indicates that the channel ({}) was incorrectly specified,\n' +
'the data specification in S3 was incorrectly specified or the role specified\n' +
'does not have permission to access the data.').format(args.train, "train"))
raw_data = [ pd.read_csv(
file,
header=None,
names=feature_columns_names + [label_column],
dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype)) for file in input_files ]
concat_data = pd.concat(raw_data)
numeric_features = list([
'B',
'C',
'E',
'H'
])
numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())])
categorical_features = list(['A','D','F','G','I','J','K'])
categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numeric_features),
('cat', categorical_transformer, categorical_features)],
remainder="drop")
preprocessor.fit(concat_data)
joblib.dump(preprocessor, os.path.join(args.model_dir, "model.joblib"))
print("saved model!")
def input_fn(input_data, request_content_type):
"""Parse input data payload
We currently only take csv input. Since we need to process both labelled
and unlabelled data we first determine whether the label column is present
by looking at how many columns were provided.
"""
content_type = request_content_type.lower(
) if request_content_type else "text/csv"
content_type = content_type.split(";")[0].strip()
if isinstance(input_data, str):
str_buffer = input_data
else:
str_buffer = str(input_data,'utf-8')
if _is_feature_transform():
if content_type == 'text/csv':
# Read the raw input data as CSV.
df = pd.read_csv(StringIO(input_data), header=None)
if len(df.columns) == len(feature_columns_names) + 1:
# This is a labelled example, includes the label
df.columns = feature_columns_names + [label_column]
elif len(df.columns) == len(feature_columns_names):
# This is an unlabelled example.
df.columns = feature_columns_names
return df
else:
raise ValueError("{} not supported by script!".format(content_type))
if _is_inverse_label_transform():
if (content_type == 'text/csv' or content_type == 'text/csv; charset=utf-8'):
# Read the raw input data as CSV.
df = pd.read_csv(StringIO(str_buffer), header=None)
if len(df.columns) == len(feature_columns_names) + 1:
# This is a labelled example, includes the ring label
df.columns = feature_columns_names + [label_column]
elif len(df.columns) == len(feature_columns_names):
# This is an unlabelled example.
df.columns = feature_columns_names
return df
else:
raise ValueError("{} not supported by script!".format(content_type))
def output_fn(prediction, accept):
"""Format prediction output
The default accept/content-type between containers for serial inference is JSON.
We also want to set the ContentType or mimetype as the same value as accept so the next
container can read the response payload correctly.
"""
accept = 'text/csv'
if type(prediction) is not np.ndarray:
prediction=prediction.toarray()
if accept == "application/json":
instances = []
for row in prediction.tolist():
instances.append({"features": row})
json_output = {"instances": instances}
return worker.Response(json.dumps(json_output), mimetype=accept)
elif accept == 'text/csv':
return worker.Response(encoders.encode(prediction, accept), mimetype=accept)
else:
raise RuntimeException("{} accept type is not supported by this script.".format(accept))
def predict_fn(input_data, model):
"""Preprocess input data
We implement this because the default predict_fn uses .predict(), but our model is a preprocessor
so we want to use .transform().
The output is returned in the following order:
rest of features either one hot encoded or standardized
"""
if _is_feature_transform():
features = model.transform(input_data)
if label_column in input_data:
# Return the label (as the first column) and the set of features.
return np.insert(features.toarray(), 0, pd.get_dummies(input_data[label_column])['True.'], axis=1)
else:
# Return only the set of features
return features
if _is_inverse_label_transform():
features = input_data.iloc[:,0]>0.5
features = features.values
return features
def model_fn(model_dir):
"""Deserialize fitted model
"""
if _is_feature_transform():
preprocessor = joblib.load(os.path.join(model_dir, "model.joblib"))
return preprocessor
Please help.
As I can see you are referring to a post that is pretty old and has issues open on Github here with regards to the Input source and the configurations.
I encourage you to check out the latest examples here which shows how to visualize Amazon SageMaker machine learning predictions with Amazon QuickSight.
Additionally, if the problem persists please open a service request with AWS Support with job ARN to investigate future on the Internal Server Error.

Incrementing over a URL variable

import urllib2
import pandas as pd
from bs4 import BeautifulSoup
x = 0
i = 1
data = []
while (i < 13):
soup = BeautifulSoup(urllib2.urlopen(
'http://games.espn.com/ffl/tools/projections?&slotCategoryId=4&scoringPeriodId=%d&seasonId=2018&startIndex=' % i, +str(x)).read(), 'html')
tableStats = soup.find("table", ("class", "playerTableTable tableBody"))
for row in tableStats.findAll('tr')[2:]:
col = row.findAll('td')
try:
name = col[0].a.string.strip()
opp = col[1].a.string.strip()
rec = col[10].string.strip()
yds = col[11].string.strip()
dt = col[12].string.strip()
pts = col[13].string.strip()
data.append([name, opp, rec, yds, dt, pts])
except Exception as e:
pass
df = pd.DataFrame(data=data, columns=[
'PLAYER', 'OPP', 'REC', 'YDS', 'TD', 'PTS'])
df
i += 1
I have been working with a fantasy football program and I am trying to increment data over all weeks so I can create a dataframe for the top 40 players for each week.
I have been able to get it for any week of my choice by manually entering the week number in the PeriodId part of the url, but I am trying to programmatically increment it over each week to make it easier. I have tried using PeriodId='+ I +' and PeriodId=%d but I keep getting various errors about str and int concatenate and bad operands. Any suggestions or tips?
Try removing the comma between %i and str(x) to concatenate the strings and see if that helps.
soup = BeautifulSoup(urllib2.urlopen('http://games.espn.com/ffl/tools/projections?&slotCategoryId=4&scoringPeriodId=%d&seasonId=2018&startIndex='%i, +str(x)).read(), 'html')
should be:
soup = BeautifulSoup(urllib2.urlopen('http://games.espn.com/ffl/tools/projections?&slotCategoryId=4&scoringPeriodId=%d&seasonId=2018&startIndex='%i +str(x)).read(), 'html')
if you have problem concatenating or formatting URL please create variable instead write it one line with BeautifulSoup and urllib2.urlopen.
Use parenthesis to format with multiple value like "before %s is %s" % (1, 0)
url = 'http://games.espn.com/ffl/tools/projections?&slotCategoryId=4&scoringPeriodId=%s&seasonId=2018&startIndex=%s' % (i, x)
# or
#url = 'http://games.espn.com/ffl/tools/projections?&slotCategoryId=4&scoringPeriodId=%s&seasonId=2018&startIndex=0' % i
html = urllib2.urlopen(url).read()
soup = BeautifulSoup(html, 'html.parser')
Make the code sorter will not effect the performance.

WatsonApiException: Error: invalid-api-key, Code: 401

I cant find Alchemy Language API in IBM Watson.
Can I do this with natural-language-understanding service and how?
When I add
from watson_developer_cloud import NaturalLanguageUnderstandingV1
from watson_developer_cloud.natural_language_understanding_v1 \
import Features, EntitiesOptions, KeywordsOptions
It shows some error with combined keyword
# In[]:
import tweepy
import re
import time
import math
import pandas as pd
from watson_developer_cloud import AlchemyLanguageV1
def initAlchemy():
al = AlchemyLanguageV1(api_key='GRYVUMdBbOtJXxNOIs1aopjjaiyOmLG7xJBzkAnvvwLh')
return al
def initTwitterApi():
consumer_key = 'OmK1RrZCVJSRmKxIuQqkBExvw'
consumer_secret = 'VWn6OR4rRgSi7qGnZHCblJMhrSvj1QbJmf0f62uX6ZQWZUUx5q'
access_token = '4852231552-adGooMpTB3EJYPHvs6oGZ40qlo3d2JbVjqUUWkJ'
access_token_secret = 'm9hgeM9p0r1nn8IoQWJYBs5qUQu56XmrAhsDSYKjuiVA4'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
return api
'''This function is implemented to handle tweepy exception errors
because search is rate limited at 180 queries per 15 minute window by twitter'''
def limit(cursor):
while True:
try:
yield cursor.next()
except tweepy.TweepError as error:
print(repr(error))
print("Twitter Request limit error reached sleeping for 15 minutes")
time.sleep(16*60)
except tweepy.RateLimitError:
print("Rate Limit Error occurred Sleeping for 16 minutes")
time.sleep(16*60)
def retrieveTweets(api, search, lim):
if(lim == ""):
lim = math.inf
else:
lim = int(lim)
text = []
for tweet in limit(tweepy.Cursor(api.search, q=search).items(limit = lim)):
t = re.sub('\s+', ' ', tweet.text)
text.append(t)
data = {"Tweet":text,
"Sentiment":"",
"Score":""}
dataFrame = pd.DataFrame(data, columns=["Tweet","Sentiment","Score"])
return dataFrame
def analyze(al,dataFrame):
sentiment = []
score = []
for i in range(0, dataFrame["Tweet"].__len__()):
res = al.combined(text=dataFrame["Tweet"][i],
extract="doc-sentiment",
sentiment=1)
sentiment.append(res["docSentiment"]["type"])
if(res["docSentiment"]["type"] == "neutral"):
score.append(0)
else:
score.append(res["docSentiment"]["score"])
dataFrame["Sentiment"] = sentiment
dataFrame["Score"] = score
return dataFrame
def main():
#Initialse Twitter Api
api = initTwitterApi()
#Retrieve tweets
dataFrame = retrieveTweets(api,input("Enter the search query (e.g. #hillaryclinton ) : "), input("Enter limit for number of tweets to be searched or else just hit enter : "))
#Initialise IBM Watson Alchemy Language Api
al = initAlchemy()
#Do Document Sentiment analysis
dataFrame = analyze(al, dataFrame)
#Save tweets, sentiment, and score data frame in csv file
dataFrame.to_csv(input("Enter the name of the file (with .csv extension) : "))
if __name__ == '__main__':
main()# -*- coding: utf-8 -*-
The Watson Natural Language Understanding only has a combined call, but since it is the only call, it isn't called combined, its actually analyze. Best place to go for details would be the API documentation - https://www.ibm.com/watson/developercloud/natural-language-understanding/api/v1/?python#post-analyze

Resources