Is there any way of using Watsons image classifying abilities to extract information from an image of a document? Rather than simply classifying an image as a, b or c?
Visit here to learn more about watson visual-recognition
Raw code (final code) : #just for reference, read how to create a classifier and implement of your own !
import json
from os.path import join, dirname
from watson_developer_cloud import VisualRecognitionV3
test_url = 'https://url-to-ran-image.jpg'
visual_recognition = VisualRecognitionV3('2017-01-11', api_key='YOUR API KEY')
tree_path = join(dirname(__file__), '../resources/trees.zip')
with open(tree_path, 'rb') as images_file:
tree_results = visual_recognition.classify(images_file=images_file,
threshold=0.1,
classifier_ids=[
'Carsvsmango_1479118188',
'default'])
print(json.dumps(tree_results, indent=2))
url_result = visual_recognition.classify(images_url=test_url)
print(json.dumps(url_result, indent=2))
faces_result = visual_recognition.detect_faces(images_url=test_url)
print(json.dumps(faces_result, indent=2))
print(json.dumps(visual_recognition.list_classifiers(), indent=2))
file_path = join(dirname(__file__), '../some-random-text-image.png')
with open(file_path, 'rb') as image_file:
text_results = visual_recognition.recognize_text(images_file=image_file)
print(json.dumps(text_results, indent=2))
face_path = join(dirname(__file__), '../face.jpg')
with open(face_path, 'rb') as image_file:
face_result = visual_recognition.detect_faces(images_file=image_file)
print(json.dumps(face_result, indent=2))
Diagram that helps in understanding watson's recognition platform well.
Related
I am using the code snap given below and its working without error but the converted file is not having .png extension as I am giving png in "OutputFormat".
I am running it in Colab and I am attaching the output also.
from osgeo import gdal
import numpy as np
import os
import subprocess
def _16bit_to_8Bit(inputRaster, outputRaster, outputPixType='Byte', outputFormat='png',
percentiles=[2, 98]):
#Convert 16bit image to 8bit
#Source: Medium.com, 'Creating Training Datasets for the SpaceNet Road Detection and Routing
#Challenge' by Adam Van Etten and Jake Shermeyer
srcRaster = gdal.Open(inputRaster)
cmd = ['gdal_translate', '-ot', outputPixType, '-of',
outputFormat]
# iterate through bands
for bandId in range(srcRaster.RasterCount):
bandId = bandId+1
band = srcRaster.GetRasterBand(bandId)
bmin = band.GetMinimum()
bmax = band.GetMaximum()
# if not exist minimum and maximum values
if bmin is None or bmax is None:
[enter image description here][1](bmin, bmax) = band.ComputeRasterMinMax(1)
# else, rescale
band_arr_tmp = band.ReadAsArray()
bmin = np.percentile(band_arr_tmp.flatten(),
percentiles[0])
bmax= np.percentile(band_arr_tmp.flatten(),
percentiles[1])
cmd.append('-scale_{}'.format(bandId))
cmd.append('{}'.format(bmin))
cmd.append('{}'.format(bmax))
cmd.append('{}'.format(0))
cmd.append('{}'.format(255))
cmd.append(inputRaster)
cmd.append(outputRaster)
print("Conversin command:", cmd)
subprocess.call(cmd)
path = "/content/drive/MyDrive/Spacenet_data/RGB_Pan/"
files = os.listdir(path)
for file in files:
resimPath = path+file
dstPath = "/content/drive/MyDrive/Spacenet_data/"
dstPath = dstPath+file
_16bit_to_8Bit(resimPath,dstPath)
My output is showing like this:
Conversin command: ['gdal_translate', '-ot', 'Byte', '-of', 'png', '-scale_1', '149.0', '863.0', '0', '255', '-scale_2', '244.0', '823.0200000000186', '0', '255', '-scale_3', '243.0', '568.0', '0', '255', '/content/drive/MyDrive/Spacenet_data/RGB_Pan/img0.tif', '/content/drive/MyDrive/Spacenet_data/img0.tif']
Make the below changes and you are done.
from osgeo import gdal
import numpy as np
import os
import subprocess
def _16bit_to_8Bit(inputRaster, outputRaster, outputPixType='Byte',
outputFormat='png', percentiles=[2, 98]):
srcRaster = gdal.Open(inputRaster)
cmd = ['gdal_translate', '-ot', outputPixType, '-of',
outputFormat]
for bandId in range(srcRaster.RasterCount):
bandId = bandId+1
band = srcRaster.GetRasterBand(bandId)
bmin = band.GetMinimum()
bmax = band.GetMaximum()
# if not exist minimum and maximum values
if bmin is None or bmax is None:
(bmin, bmax) = band.ComputeRasterMinMax(1)
# else, rescale
band_arr_tmp = band.ReadAsArray()
bmin = np.percentile(band_arr_tmp.flatten(),
percentiles[0])
bmax= np.percentile(band_arr_tmp.flatten(),
percentiles[1])
cmd.append('-scale_{}'.format(bandId))
cmd.append('{}'.format(bmin))
cmd.append('{}'.format(bmax))
cmd.append('{}'.format(0))
cmd.append('{}'.format(255))
cmd.append(inputRaster)
cmd.append(outputRaster)
print("Conversin command:", cmd)
subprocess.call(cmd)
path = "/content/drive/MyDrive/Spacenet_data/RGB_Pan/"
files = os.listdir(path)
for file in files:
resimPath = path+file
dstPath = "/content/drive/MyDrive/Spacenet_data/"
dstPath = dstPath+file[:-3]+"png"
_16bit_to_8Bit(resimPath,dstPath)
import os
import cv2
directory = os.fsencode(r"path")
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith(".tif"):
print(filename)
print(type(filename))
print("\n")
image = cv2.imread(filename)
cv2.imwrite("{}.jpg".format(filename), image)
continue
else:
continue
I need to load the sklearn model via pickle file in C. But I do not find how to do that. This is my code and my model:
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import pickle
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
X = array[:, 0:8]
Y = array[:, 8]
test_size = 0.33
seed = 7
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)
# Fit the model on 33%
model = LogisticRegression()
model.fit(X_train, Y_train)
print(model.score(X_test, Y_test))
# save the model to disk
filename = 'finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))
I do not know how to do that. I would be grateful if you could help me, please.
There are always workarounds, One such way could be
Write a python flask based API/microservice over your python code & call it in c using the LibCurl library. I've made an asumption that your test_data will be a single column csv file, All flask apps runs on localhost:5000 by default. I will suggest you to write your own app with proper requests for arguments if your data doesnt't look like this. This piece of code is for reference only.
#python flask code
from flask import Flask
app = Flask(__name__)
#app.route('/')
def your_model():
test_data = pandas.read_csv('test_data.csv')
model = pickle.load("your_model.pkl")
pred_result = model.predict(test_data)
return(pred_result)
app.run()
/* c code*/
CURL *hnd = curl_easy_init();
curl_easy_setopt(hnd, CURLOPT_CUSTOMREQUEST, "POST");
curl_easy_setopt(hnd, CURLOPT_URL, "http://localhost:5000/your_API_name");
struct curl_slist *headers = NULL;
test_data_as_json_string = {""};
headers = curl_slist_append(headers, "any_parameter: value");
headers = curl_slist_append(headers, "content-type: application/json");
curl_easy_setopt(hnd, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(hnd, CURLOPT_POSTFIELDS, test_data_as_json_string);
CURLcode ret = curl_easy_perform(hnd);
This code too is for reference write your own c code or you can copy paste similar kind of code from Postman's code section while running the flask api.
I'm trying to embed python in my C application. I download the package in python official website and manage to do a simple Hello World.
Now I want to go deeper and use some libraries of python like numpy, keras, tensorflow...
I'm working with Python 3.5.4, I installed all the needed package on my PC with pip3 :
pip3 install keras
pip3 install tensorflow
...
then I created my script and launch it in python environment, it works fine :
Python:
# Importing the libraries
#
import numpy as np
import pandas as pd
dataset2 = pd.read_csv('I:\RNA\dataset19.csv')
X_test = dataset2.iloc[:, 0:228].values
y_test = dataset2.iloc[:, 228].values
# 2.
import pickle
sc = pickle.load(open('I:\RNA\isVerb_sc', 'rb'))
X_test = sc.transform(X_test)
# 3.
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
classifier = Sequential()
classifier.add(Dense(units = 114, kernel_initializer = 'uniform', activation = 'relu', input_dim = 228))
classifier.add(Dropout(p = 0.3))
classifier.add(Dense(units = 114, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dropout(p = 0.3))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
classifier.load_weights('I:\RNA\isVerb_weights.h5')
y_pred = classifier.predict(X_test)
y_pred1 = (y_pred > 0.5)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred1)
But when I execute the same script in a C environment with embed python it didn't work :
At first, I execute my script directly with PyRun_SimpleFile with no luck, so I sliced it in multiple instructions with PyRun_SimpleString to detect the problem :
C:
result = PyRun_SimpleString("import numpy as np"); // result = 0 (ok)
result = PyRun_SimpleString("import pandas as pd"); // result = 0 (ok)
...
result = PyRun_SimpleString("import pickle"); // result = 0 (ok)
... (all insctruction above works)
result = PyRun_SimpleString("import keras"); // result = -1 !!
... (all under this failed)
but there is not a single stack trace about this error, I tried this but I just got :
"Here's the output: (null)"
My initialization of Python in C seems correct since others libraries import fine :
// Python
wchar_t *stdProgramName = L"I:\\LIBs\\cpython354";
Py_SetProgramName(stdProgramName);
wchar_t *stdPythonHome = L"I:\\LIBs\\cpython354";
Py_SetPythonHome(stdPythonHome);
wchar_t *stdlib = L"I:\\LIBs\\cpython354;I:\\LIBs\\cpython354\\Lib\\python35.zip;I:\\LIBs\\cpython354\\Lib;I:\\LIBs\\cpython354\\DLLs;I:\\LIBs\\cpython354\\Lib\\site-packages";
Py_SetPath(stdlib);
// Initialize Python
Py_Initialize();
When inside a Python cmd, the line import keras take some time (3sec) but works (a warning but I found no harm around it) :
>>> import keras
I:\LIBs\cpython354\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
Using TensorFlow backend.
>>>
I'm at loss now, I don't know where to look at since there is no stack trace.
it seems like when you import keras, it executes this line :
sys.stderr.write('Using TensorFlow backend.\n')
or sys.stderr was not defined in python embedded on windows
A simple correction is to define sys.stderr, for example :
import sys
class CatchOutErr:
def __init__(self):
self.value = ''
def write(self, txt):
self.value += txt
catchOutErr = CatchOutErr()
sys.stderr = catchOutErr
App executes but the range doesn't. In my CSV file, it only shows the first entry. I've also come across index out of range errors when scraping other fields. Any help would be appreciated. I'm learning.
import requests
import csv
from bs4 import BeautifulSoup
f = csv.writer(open('salons.csv', 'w'))
f.writerow(['Name'])
pages = []
for i in range(0, 10600):
url = 'http://www.aveda.com/locator/get_the_facts.tmpl?SalonID=' + str(i) +' '
pages.append(url)
for item in pages:
page = requests.get(item)
soup = BeautifulSoup(page.text, 'lxml')
salon_name_list = soup.find(class_='getthefacts__store_meta_info--store_phone')
salon_name_list_items = salon_name_list.find_all('li', class_='phone')
for salon_name in salon_name_list_items:
names = salon_name.contents[0]
f.writerow([names])
The way you tried to find phone numbers is not how you should do. Phone numbers are within a tag under class name phone. Try this instead. It will fetch you the phone numbers you are interested in:
import requests ; import csv
from bs4 import BeautifulSoup
outfile = open('salons.csv','w')
writer = csv.writer(outfile)
writer.writerow(['Name'])
for i in range(0, 10600):
url = 'http://www.aveda.com/locator/get_the_facts.tmpl?SalonID={0}'.format(i)
page = requests.get(url)
soup = BeautifulSoup(page.text, 'lxml')
for salon_name in soup.select('.phone a'):
names = salon_name.text
print(names)
writer.writerow([names])
outfile.close()
Not sure how you have indented your code. Format it properly in the question. And you may not need two for loops.
import requests
import csv
from bs4 import BeautifulSoup
f = csv.writer(open('salons.csv', 'w'))
f.writerow(['Name'])
for i in range(0, 10600):
url = 'http://www.aveda.com/locator/get_the_facts.tmpl?SalonID=' + str(i) +'/'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'lxml')
salon_name_list = soup.find(class_='getthefacts__store_meta_info--store_phone')
salon_name_list_items = salon_name_list.find_all('li', class_='phone')
for salon_name in salon_name_list_items:
names = salon_name.contents[0]
f.writerow([names])
I need to convert the WordNet database files (noun.shape, noun.state, verb.cognition ecc) from their custom extension to .txt in order to more easily extract their nouns, verbs, adjectives and adverbs in their custom category.
In other words, in "DATABASE FILES ONLY" you'll find the files I'm looking for, unfortunately they have a .STATE or .SHAPE extension. They are readable in the notepad but I need a list with all the items in those files without their definition in parenthesis.
If you're using WordNet simply as a dictionary, you can try Open Multilingual WordNet, see http://compling.hss.ntu.edu.sg/omw/
import os, codecs
from nltk.corpus import wordnet as wn
# Read Open Multi WN's .tab file
def readWNfile(wnfile, option="ss"):
reader = codecs.open(wnfile, "r", "utf8").readlines()
wn = {}
for l in reader:
if l[0] == "#": continue
if option=="ss":
k = l.split("\t")[0] #ss as key
v = l.split("\t")[2][:-1] #word
else:
v = l.split("\t")[0] #ss as value
k = l.split("\t")[2][:-1] #word as key
try:
temp = wn[k]
wn[k] = temp + ";" + v
except KeyError:
wn[k] = v
return wn
if not os.path.exists('msa/wn-data-zsm.tab'):
os.system('wget http://compling.hss.ntu.edu.sg/omw/wns/zsm.zip')
os.system('unzip zsm.zip')
msa_wn = readWNfile('msa/wn-data-zsm.tab')
eng_wn_keys = {(str(i.offset).zfill(8) + '-'+i.pos).decode('utf8'):i for i in wn.all_synsets()}
for i in set(eng_wn_keys).intersection(msa_wn.keys()):
print eng_wn_keys[i], msa_wn[i]
Meanwhile, hold on for a while because the NLTK developers are going to put the Open Multilingual Wordnet API together soon, see https://github.com/nltk/nltk/blob/develop/nltk/corpus/reader/wordnet.py from line 1048