numpy.loadtxt -> could not convert string to float: '-0,0118' - arrays

When I try to run this code:
x,y = np.genfromtxt('Diode_A_Upcd.txt', unpack = True, delimiter = ';' )
I get this:
Array Data
My data:
Diode_A_Upcd.txt
I would like to store each row in individual arrays in order to then plot them

The error message is due to the string being 0,0118. Numpy needs floats to be 0.0118, decimal point not a decimal comma. np.loadtxt has an optional argument converters to allow a function to convert strings in one format to another.
import numpy as np
test_string = [ '0,123;5,234', '0,789;123,45']
def translate( s ):
s = s.replace( b',', b'.' )
return s
conv = { 0: translate, 1: translate }
# Apply translate to both columns.
result = np.loadtxt( test_string, delimiter = ';', converters = conv )
result
# array([[1.2300e-01, 5.2340e+00],
# [7.8900e-01, 1.2345e+02]])

Related

Python, face_recognition convert string to array

I want to convert a variable to a string and then to an array that I can use to compare, but i dont know how to do that.
my code:
import face_recognition
import numpy as np
a = face_recognition.load_image_file('C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191115_10_32_24_Pro.jpg') # my picture 1
b = face_recognition.load_image_file('C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191115_09_48_56_Pro.jpg') # my picture 2
c = face_recognition.load_image_file(
'C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191115_09_48_52_Pro.jpg') # my picture 3
d = face_recognition.load_image_file('C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\ziv sion.jpg') # my picture 4
e = face_recognition.load_image_file(
'C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191120_17_46_40_Pro.jpg') # my picture 5
f = face_recognition.load_image_file(
'C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191117_16_19_11_Pro.jpg') # my picture 6
a = face_recognition.face_encodings(a)[0]
b = face_recognition.face_encodings(b)[0]
c = face_recognition.face_encodings(c)[0]
d = face_recognition.face_encodings(d)[0]
e = face_recognition.face_encodings(e)[0]
f = face_recognition.face_encodings(f)[0]
Here I tried to convert the variable to a string
str_variable = str(a)
array_variable = np.array(str_variable)
my_face = a, b, c, d, e, f, array_variable
while True:
new = input('path: ')
print('Recognizing...')
unknown = face_recognition.load_image_file(new)
unknown_encodings = face_recognition.face_encodings(unknown)[0]
The program cannot use the variable:
results = face_recognition.compare_faces(array_variable, unknown_encodings, tolerance=0.4)
print(results)
recognize_times = int(results.count(True))
if (3 <= recognize_times):
print('hello boss!')
my_face = *my_face, unknown_encodings
please help me
The error shown:
Traceback (most recent call last):
File "C:/Users/zivsi/PycharmProjects/AI/pytt.py", line 37, in <module>
results = face_recognition.compare_faces(my_face, unknown_encodings, tolerance=0.4)
File "C:\Users\zivsi\AppData\Local\Programs\Python\Python36\lib\site-
packages\face_recognition\api.py", line 222, in compare_faces
return list(face_distance(known_face_encodings, face_encoding_to_check) <= tolerance)
File "C:\Users\zivsi\AppData\Local\Programs\Python\Python36\lib\site-packages\face_recognition\api.py", line 72, in face_distance
return np.linalg.norm(face_encodings - face_to_compare, axis=1)
ValueError: operands could not be broadcast together with shapes (7,) (128,)
First of all, the array_variable should actually be a list of the known encodings and not a numpy array.
Also you do not need str.
Now, in your case, if the input images i.e., a,b,c,d,f,e do NOT have the same dimensions, the error will persist. You can not compare images that have different sizes using this function. The reason is that the comparison is based on the distance and distance is defined on vectors of the same length.
Here is a working simple example using the photos from https://github.com/ageitgey/face_recognition/tree/master/examples:
import face_recognition
import numpy as np
from PIL import Image, ImageDraw
from IPython.display import display
# Load a sample picture and learn how to recognize it.
obama_image = face_recognition.load_image_file("obama.jpg")
obama_face_encoding = face_recognition.face_encodings(obama_image)[0]
# Load a second sample picture and learn how to recognize it.
biden_image = face_recognition.load_image_file("biden.jpg")
biden_face_encoding = face_recognition.face_encodings(biden_image)[0]
array_variable = [obama_face_encoding,biden_face_encoding] # list of known encodings
# compare the list with the biden_face_encoding
results = face_recognition.compare_faces(array_variable, biden_face_encoding, tolerance=0.4)
print(results)
[False, True] # True means match, False mismatch
# False: coming from obama_face_encoding VS biden_face_encoding
# True: coming from biden_face_encoding VS biden_face_encoding
To run it go here: https://beta.deepnote.com/project/09705740-31c0-4d9a-8890-269ff1c3dfaf#
Documentation: https://face-recognition.readthedocs.io/en/latest/face_recognition.html
EDIT
To save the known encodings you can use numpy.save
np.save('encodings',biden_face_encoding) # save
load_again = np.load('encodings.npy') # load again

Incrementing over a URL variable

import urllib2
import pandas as pd
from bs4 import BeautifulSoup
x = 0
i = 1
data = []
while (i < 13):
soup = BeautifulSoup(urllib2.urlopen(
'http://games.espn.com/ffl/tools/projections?&slotCategoryId=4&scoringPeriodId=%d&seasonId=2018&startIndex=' % i, +str(x)).read(), 'html')
tableStats = soup.find("table", ("class", "playerTableTable tableBody"))
for row in tableStats.findAll('tr')[2:]:
col = row.findAll('td')
try:
name = col[0].a.string.strip()
opp = col[1].a.string.strip()
rec = col[10].string.strip()
yds = col[11].string.strip()
dt = col[12].string.strip()
pts = col[13].string.strip()
data.append([name, opp, rec, yds, dt, pts])
except Exception as e:
pass
df = pd.DataFrame(data=data, columns=[
'PLAYER', 'OPP', 'REC', 'YDS', 'TD', 'PTS'])
df
i += 1
I have been working with a fantasy football program and I am trying to increment data over all weeks so I can create a dataframe for the top 40 players for each week.
I have been able to get it for any week of my choice by manually entering the week number in the PeriodId part of the url, but I am trying to programmatically increment it over each week to make it easier. I have tried using PeriodId='+ I +' and PeriodId=%d but I keep getting various errors about str and int concatenate and bad operands. Any suggestions or tips?
Try removing the comma between %i and str(x) to concatenate the strings and see if that helps.
soup = BeautifulSoup(urllib2.urlopen('http://games.espn.com/ffl/tools/projections?&slotCategoryId=4&scoringPeriodId=%d&seasonId=2018&startIndex='%i, +str(x)).read(), 'html')
should be:
soup = BeautifulSoup(urllib2.urlopen('http://games.espn.com/ffl/tools/projections?&slotCategoryId=4&scoringPeriodId=%d&seasonId=2018&startIndex='%i +str(x)).read(), 'html')
if you have problem concatenating or formatting URL please create variable instead write it one line with BeautifulSoup and urllib2.urlopen.
Use parenthesis to format with multiple value like "before %s is %s" % (1, 0)
url = 'http://games.espn.com/ffl/tools/projections?&slotCategoryId=4&scoringPeriodId=%s&seasonId=2018&startIndex=%s' % (i, x)
# or
#url = 'http://games.espn.com/ffl/tools/projections?&slotCategoryId=4&scoringPeriodId=%s&seasonId=2018&startIndex=0' % i
html = urllib2.urlopen(url).read()
soup = BeautifulSoup(html, 'html.parser')
Make the code sorter will not effect the performance.

Scala: Parsing Array of String to a case class

I have created a case class like this:
def case_class(): Unit = {
case class StockPrice(quarter : Byte,
stock : String,
date : String,
open : Double,
high : Double,
low : Double,
close : Double,
volume : Double,
percent_change_price : Double,
percent_change_volume_over_last_wk : Double,
previous_weeks_volume : Double,
next_weeks_open : Double,
next_weeks_close : Double,
percent_change_next_weeks_price : Double,
days_to_next_dividend : Double,
percent_return_next_dividend : Double
)
And I have thousands of line as Array of String like this:
1,AA,1/7/2011,$15.82,$16.72,$15.78,$16.42,239655616,3.79267,,,$16.71,$15.97,-4.42849,26,0.182704
1,AA,1/14/2011,$16.71,$16.71,$15.64,$15.97,242963398,-4.42849,1.380223028,239655616,$16.19,$15.79,-2.47066,19,0.187852
1,AA,1/21/2011,$16.19,$16.38,$15.60,$15.79,138428495,-2.47066,-43.02495926,242963398,$15.87,$16.13,1.63831,12,0.189994
1,AA,1/28/2011,$15.87,$16.63,$15.82,$16.13,151379173,1.63831,9.355500109,138428495,$16.18,$17.14,5.93325,5,0.185989
How Can I parse data from Array into that case class?
Thank you for your help!
You can proceed as below (I've taken simplified example)
Given your case class and data (lines)
// Your case-class
case class MyCaseClass(
fieldByte: Byte,
fieldString: String,
fieldDouble: Double
)
// input data
val lines: List[String] = List(
"1,AA,$1.1",
"2,BB,$2.2",
"3,CC,$3.3"
)
Note: you can read lines from a text file as
val lines = Source.fromFile("my_file.txt").getLines.toList
You can have some utility methods for mapping (cleaning & parsing)
// remove '$' symbols from string
def removeDollars(line: String): String = line.replaceAll("\\$", "")
// split string into tokens and
// convert into MyCaseClass object
def parseLine(line: String): MyCaseClass = {
val tokens: Seq[String] = line.split(",")
MyCaseClass(
fieldByte = tokens(0).toByte,
fieldString = tokens(1),
fieldDouble = tokens(2).toDouble
)
}
And then use them to convert strings into case-class objects
// conversion
val myCaseClassObjects: Seq[MyCaseClass] = lines.map(removeDollars).map(parseLine)
As a more advanced (and generalized) approach, you can generate the mapping (parsing) function for converting tokens into fields of your case-class using something like reflection, as told here
Here's one way of doing it. I'd recommend splitting everything you do up into lots of small, easy-to-manage functions, otherwise you will get lost trying to figure out where something is going wrong if it all starts throwing exceptions. Data setup:
val array = Array("1,AA,1/7/2011,$15.82,$16.72,$15.78,$16.42,239655616,3.79267,,,$16.71,$15.97,-4.42849,26,0.182704",
"1,AA,1/14/2011,$16.71,$16.71,$15.64,$15.97,242963398,-4.42849,1.380223028,239655616,$16.19,$15.79,-2.47066,19,0.187852",
"1,AA,1/21/2011,$16.19,$16.38,$15.60,$15.79,138428495,-2.47066,-43.02495926,242963398,$15.87,$16.13,1.63831,12,0.189994",
"1,AA,1/28/2011,$15.87,$16.63,$15.82,$16.13,151379173,1.63831,9.355500109,138428495,$16.18,$17.14,5.93325,5,0.185989")
case class StockPrice(quarter: Byte, stock: String, date: String, open: Double,
high: Double, low: Double, close: Double, volume: Double, percent_change_price: Double,
percent_change_volume_over_last_wk: Double, previous_weeks_volume: Double,
next_weeks_open: Double, next_weeks_close: Double, percent_change_next_weeks_price: Double,
days_to_next_dividend: Double, percent_return_next_dividend: Double
)
Function to turn Array[String] into Array[List[String]] and handle any empty fields (I've made an assumption here that you want empty fields to be 0. Change this as necessary):
def splitArray(arr: Array[String]): Array[List[String]] = {
arr.map(
_.replaceAll("\\$", "") // Remove $
.split(",") // Split by ,
.map {
case x if x.isEmpty => "0" // If empty
case y => y // If not empty
}
.toList
)
}
Function to turn a List[String] into a StockPrice. Note that this will fall over if the List isn't exactly 16 items long. I'll leave you to handle any of that. Also, the names are pretty non-descriptive so you can change that too. It will also fall over if your data doesn't map to the relevant .toDouble or toByte or whatever - you can handle this yourself too:
def toStockPrice: List[String] => StockPrice = {
case a :: b :: c :: d :: e :: f :: g :: h :: i :: j :: k :: l :: m :: n :: o :: p :: Nil =>
StockPrice(a.toByte, b, c, d.toDouble, e.toDouble, f.toDouble, g.toDouble, h.toDouble, i.toDouble, j.toDouble,
k.toDouble, l.toDouble, m.toDouble, n.toDouble, o.toDouble, p.toDouble)
}
A nice function to bring this all together:
def makeCaseClass(arr: Array[String]): Seq[StockPrice] = {
val splitArr: Array[List[String]] = splitArray(arr)
splitArr.map(toStockPrice)
}
Output:
println(makeCaseClass(array))
//ArraySeq(
// StockPrice(1,AA,1/7/2011,15.82,16.72,15.78,16.42,2.39655616E8,3.79267,0.0,0.0,16.71,15.97,-4.42849,26.0,0.182704),
// StockPrice(1,AA,1/14/2011,16.71,16.71,15.64,15.97,2.42963398E8,-4.42849,1.380223028,2.39655616E8,16.19,15.79,-2.47066,19.0,0.187852),
// StockPrice(1,AA,1/21/2011,16.19,16.38,15.6,15.79,1.38428495E8,-2.47066,-43.02495926,2.42963398E8,15.87,16.13,1.63831,12.0,0.189994),
// StockPrice(1,AA,1/28/2011,15.87,16.63,15.82,16.13,1.51379173E8,1.63831,9.355500109,1.38428495E8,16.18,17.14,5.93325,5.0,0.185989)
//)
Edit:
To explain the a :: b :: c ..... bit - this is a way of assigning names to items in a List or Seq, given you know the List's size.
val ls = List(1, 2, 3)
val a :: b :: c :: Nil = List(1, 2, 3)
println(a == ls.head) // true
println(b == ls(1)) // true
println(c == ls(2)) // true
Note that the Nil is important because it signifies the last element of the List being Nil. Without it, c would be equal to List(3) as the rest of any List is assigned to the last value in your definition.
You can use this in pattern matching as I have in order to do something with the results:
val ls = List(1, "b", true)
ls match {
case a :: b :: c if c == true => println("this will not be printed")
case a :: b :: c :: Nil if c == true => println(s"this will get printed because c == $c")
} // not exhaustive but you get the point
You can also use it if you know what you want each element in the List to be, like this:
val personCharacteristics = List("James", 26, "blue", 6, 85.4, "brown")
val name :: age :: eyeColour :: otherCharacteristics = personCharacteristics
println(s"Name: $name; Age: $age; Eye colour: $eyeColour")
// Name: James; Age: 26; Eye colour: blue
Obviously these examples are pretty trivial and not exactly what you'd see as a professional Scala developer (at least I don't), but it's a handy thing to be aware of as I do still use this :: syntax at work sometimes.

Sentiment140 Preprocessing

I have been trying to do some preprocessing on the Sentiment140 database on Kaggle: https://www.kaggle.com/kazanova/sentiment140
The code I'm using is this:
import os
from nltk.stem.lancaster import LancasterStemmer
from nltk.tokenize import RegexpTokenizer
Base_location = ''
dataset_location = os.path.join(Base_location, 'Sentiment140.csv')
corpus = []
labels = []
# Parse tweets and sentiments
with open(dataset_location, 'r', encoding='latin-1') as df:
for i, line in enumerate(df):
parts = line.strip().split(',')
# Sentiment (0 = Negative, 1 = Positive)
labels.append(str(parts[0].strip()))
# Tweet
tweet = parts[5].strip()
if tweet.startswith('"'):
tweet = tweet[1:]
if tweet.endswith('"'):
tweet = tweet[::-1]
corpus.append(tweet.strip().lower())
print('Corpus size: {}'.format(len(corpus)))
# Tokenize and stem
tkr = RegexpTokenizer('[a-zA-Z0-9#]+')
stemmer = LancasterStemmer()
tokenized_corpus = []
for i, tweet in enumerate(corpus):
tokens = [stemmer.stem(t) for t in tkr.tokenize(tweet) if not t.startswith('#')]
tokenized_corpus.append(tokens)
print(tokenized_corpus)
However, I keep getting this error:
TypeError: '_io.TextIOWrapper' object is not subscriptable
Can anyone help me understand how to solve the issue?
Thanks in advance
TL;DR
To read .csv or structured datasets, use pandas https://pandas.pydata.org/ or any other dataframe libraries.
In Long:
Instead of doing:
Base_location = ''
dataset_location = os.path.join(Base_location, 'Sentiment140.csv')
corpus = []
labels = []
# Parse tweets and sentiments
with open(dataset_location, 'r', encoding='latin-1') as df:
for i, line in enumerate(df):
parts = line.strip().split(',')
# Sentiment (0 = Negative, 1 = Positive)
labels.append(str(parts[0].strip()))
# Tweet
tweet = parts[5].strip()
if tweet.startswith('"'):
tweet = tweet[1:]
if tweet.endswith('"'):
tweet = tweet[::-1]
corpus.append(tweet.strip().lower())
You could simply read the .csv file with pandas, e.g.
import pandas as pd
corpus = pd.read_csv('training.1600000.processed.noemoticon.csv', encoding='latin-1')
Then use the .apply() function to process the tweets:
"""
Columns
====
target: the polarity of the tweet (0 = negative, 2 = neutral, 4 = positive)
ids: The id of the tweet ( 2087)
date: the date of the tweet (Sat May 16 23:58:44 UTC 2009)
flag: The query (lyx). If there is no query, then this value is NO_QUERY.
user: the user that tweeted (robotickilldozr)
text: the text of the tweet (Lyx is cool)
"""
from nltk.stem.lancaster import LancasterStemmer
from nltk.tokenize import RegexpTokenizer
import pandas as pd
df = pd.read_csv('training.1600000.processed.noemoticon.csv',
header=None,
names=['target', 'ids', 'date', 'flag', 'user', 'text'],
encoding='latin-1')
tokenizer = RegexpTokenizer('[a-zA-Z0-9#]+')
stemmer = LancasterStemmer()
def process_tweet(tweet):
return [stemmer.stem(token) if not token.startswith('#') else token
for token in tokenizer.tokenize(tweet)]
# 1. Cast the column type to string
# 2. Lowercase it
# 3. Iterate throw each row and get the output from process_tweet()
# 4. # 3. Keep in a new column call `tokenized_text`
df['tokenized_text']= df['text'].str.lower().apply(process_tweet)

Haskell parse a string into array of arrays

I Need to parse a String into Array of Arrays. String is splited in Arrays by "\n" char. And each of that Array is splited by "," or ";" signs.
Example: 4;5\n6,7 -----> [[4,5][6,7]]
import qualified Text.Parsec as P (char,runP,noneOf,many,(<|>),eof)
import Text.ParserCombinators.Parsec
import Text.Parsec.String
import Text.Parsec.Char
import Text.PrettyPrint.HughesPJ
import Data.Maybe
newtype CSV = CSV [Row] deriving (Show,Eq)
type Row = [String]
parseCSV :: Parser CSV
parseCSV = do
return $ CSV (endBy (sepBy (many (noneOf ",\n")) (Text.Parsec.Char.char ',')) (Text.Parsec.Char.char '\n'))
runParsec :: Parser a -> String -> Maybe a
runParsec parser input = case P.runP parser () "" input of
Left _ -> Nothing
Right a -> Just a
But when I try to run the Code I get error because of wrong data types
Here is a solution that parses runParsec parseCSV "4;5\n6,7\n" into Just (CSV [["4","5"],["6","7"]]).
parseCSV :: Parser CSV
parseCSV = CSV <$> csv
where
csv = row `endBy` char '\n'
row = many (noneOf ",;\n") `sepBy` oneOf ",;"

Resources