H2OGeneralizedLinearEstimator ERROR - h2o

import h2o
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.deeplearning import H2ODeepLearningEstimator
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
h2o.init()
inputFile = h2o.import_file("SQLBlocked.csv")
inputFile['cat'] = inputFile['cat'].asfactor()
inputFile['entityN'] = inputFile['entityN'].asfactor()
inputFile['expectedT'] = inputFile['expectedT'].asfactor()
inputFile['u_play'] = inputFile['u_play'].asfactor()
inputFile['sub'] = inputFile['sub'].asfactor()
predictors = ["attempts", "cat", "entityN", "expectedT", "u_play", "sub"]
response1 = ['count.value']
inputFile.types
model = H2OGeneralizedLinearEstimator()
model.train(predictors, response1, training_frame = inputFile)
I am getting the following error:
H2OTypeError: Argument y should be a None | integer | string, got list ['count.value']

You are passing response as list ['count.value'] and that is the problem. You just need to pass response as 'count.value', thats all, like as below:
response1 = 'count.value'

Related

How to get all installed font path?

How to get all installed font path with pywin32?
I can only find a way with registry key, but I would prefer to directly use GDI or DirectWrite.
Edit:
I am not sure, but from what I can see, here is how it would maybe be possible with GDI:
Create Factory: https://learn.microsoft.com/en-us/windows/win32/api/dwrite/nf-dwrite-dwritecreatefactory
GetSystemFontCollection: https://learn.microsoft.com/en-us/windows/win32/api/dwrite/nf-dwrite-idwritefactory-getsystemfontcollection
Do a for loop with GetFontFamilyCount: https://learn.microsoft.com/en-us/windows/win32/api/dwrite/nf-dwrite-idwritefontcollection-getfontfamilycount
GetFontFamily: https://learn.microsoft.com/en-us/windows/win32/api/dwrite/nf-dwrite-idwritefontcollection-getfontfamily
GetMatchingFonts (the param weight, stretch, style can be anything. These param seems to only change the order or the return list): https://learn.microsoft.com/en-us/windows/win32/api/dwrite/nf-dwrite-idwritefontfamily-getmatchingfonts
Do a for loop with GetFontCount:https://learn.microsoft.com/en-us/windows/win32/api/dwrite/nf-dwrite-idwritefontlist-getfontcount
GetFont: https://learn.microsoft.com/en-us/windows/win32/api/dwrite/nf-dwrite-idwritefontlist-getfont
CreateFontFace: https://learn.microsoft.com/en-us/windows/win32/api/dwrite/nf-dwrite-idwritefont-createfontface
GetFiles: https://learn.microsoft.com/en-us/windows/win32/api/dwrite/nf-dwrite-idwritefontface-getfiles
GetReferenceKey: https://learn.microsoft.com/en-us/windows/win32/api/dwrite/nf-dwrite-idwritefontfile-getreferencekey
Again DWriteCreateFactory but with uuidof IDWriteLocalFontFileLoader
GetFilePathFromKey: https://learn.microsoft.com/en-us/windows/win32/directwrite/idwritelocalfontfileloader-getfilepathfromkey
I found an solution.
I used DirectWrite API.
This code depends on pyglet librairy.
import sys
import time
from ctypes import byref, c_uint32, create_unicode_buffer
from pyglet.font.directwrite import (
DWriteCreateFactory,
DWRITE_FACTORY_TYPE_ISOLATED,
IDWriteFactory,
IDWriteFont,
IDWriteFontCollection,
IDWriteFontFace,
IDWriteFontFamily,
IDWriteFontFile,
IDWriteFontFileLoader,
IDWriteLocalFontFileLoader,
IID_IDWriteFactory,
IID_IDWriteLocalFontFileLoader,
)
from pyglet.libs.win32.types import c_void_p
from typing import List
def get_fonts_filepath() -> List[str]:
"""
Return an list of all the font installed.
"""
write_factory = IDWriteFactory()
DWriteCreateFactory(
DWRITE_FACTORY_TYPE_ISOLATED, IID_IDWriteFactory, byref(write_factory)
)
fonts_path = set()
sys_collection = IDWriteFontCollection()
write_factory.GetSystemFontCollection(byref(sys_collection), 0)
collection_count = sys_collection.GetFontFamilyCount()
for i in range(collection_count):
family = IDWriteFontFamily()
sys_collection.GetFontFamily(i, byref(family))
font_count = family.GetFontCount()
for j in range(font_count):
font = IDWriteFont()
family.GetFont(j, byref(font))
font_face = IDWriteFontFace()
font.CreateFontFace(byref(font_face))
file_ct = c_uint32()
font_face.GetFiles(byref(file_ct), None)
font_files = (IDWriteFontFile * file_ct.value)()
font_face.GetFiles(byref(file_ct), font_files)
pff = font_files[0]
key_data = c_void_p()
ff_key_size = c_uint32()
pff.GetReferenceKey(byref(key_data), byref(ff_key_size))
loader = IDWriteFontFileLoader()
pff.GetLoader(byref(loader))
try:
local_loader = IDWriteLocalFontFileLoader()
loader.QueryInterface(
IID_IDWriteLocalFontFileLoader, byref(local_loader)
)
except OSError: # E_NOTIMPL
font.Release()
font_face.Release()
loader.Release()
pff.Release()
continue
path_len = c_uint32()
local_loader.GetFilePathLengthFromKey(
key_data, ff_key_size, byref(path_len)
)
buffer = create_unicode_buffer(path_len.value + 1)
local_loader.GetFilePathFromKey(key_data, ff_key_size, buffer, len(buffer))
font.Release()
font_face.Release()
loader.Release()
local_loader.Release()
pff.Release()
fonts_path.add(buffer.value)
family.Release()
sys_collection.Release()
write_factory.Release()
return list(fonts_path)
def main():
start = time.time()
fonts_path_dwrite = get_fonts_filepath()
print(time.time() - start)
print(fonts_path_dwrite)
if __name__ == "__main__":
sys.exit(main())

ValueError: could not broadcast input array from shape (50,) into shape (1,)

why i am getting error when i run the below code?
ValueError: could not broadcast input array from shape (50,) into shape (1,)
from gensim.test.utils import datapath, get_tmpfile
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec
path = '/Users/welcome/Documents/Project/'
glove_file = datapath(path +'glove.6B.50d.txt')
tmp_file = get_tmpfile(path + "test_word2vec.txt")
_ = glove2word2vec(glove_file, tmp_file)
glovemodel = KeyedVectors.load_word2vec_format(tmp_file)

how to add symbols to the multiple stock data

#i have scraped data below is my code, now i want to add a column of symbols to the respective company data, plz guide me how the symbol can be added to the respective firm data
#code below
from time import sleep
import pandas as pd
import os
import numpy as np
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
browser = webdriver.Chrome(ChromeDriverManager().install())
symbols =['FATIMA',
'SSGC',
'FCCL',
'ISL',
'KEL',
'NCL',
'DGKC',
'SNGP',
'NML',
'ENGRO',
'HUMNL',
'CHCC',
'ATRL',
'HUBC',
'ASTL',
'PIBTL',
'OGDC',
'EFERT',
'FFC',
'NCPL',
'KTML',
'PSO',
'LUCK',
'SEARL',
'KOHC',
'ABOT',
'AICL',
'HASCOL',
'PTC',
'KAPCO',
'PIOC',
'POL',
'SHEL',
'GHGL',
'HCAR',
'DCR',
'BWCL',
'MTL',
'GLAXO',
'PKGS',
'SHFA','MARI',
'ICI',
'ACPL',
'PSMC',
'SPWL',
'THALL',
'BNWM',
'EFUG',
'GADT',
'AABS']
company = 1
for ThisSymbol in symbols :
# Get first symbol from the above python list
company = 2
# In the URL, make symbol as variable
url = 'http://www.scstrade.com/stockscreening/SS_CompanySnapShotYF.aspx?symbol=' + ThisSymbol
browser.get(url)
sleep(2)
# The below command will get all the contents from the url
html = browser.execute_script("return document.documentElement.outerHTML")
# So we will supply the contents to beautiful soup and we tell to consider this text as a html, with the following command
soup = BeautifulSoup (html, "html.parser")
for rn in range(0,9) :
plist = []
r = soup.find_all('tr')[rn]
# Condition: if first row, then th, otherwise td
if (rn==0) :
celltag = 'th'
else :
celltag = 'td'
# Now use the celltag instead of using fixed td or th
col = r.find_all(celltag)
print()
if col[i] == 0:
print ("")
else:
for i in range(0,4) :
cell = col[i].text
clean = cell.replace('\xa0 ', '')
clean = clean.replace (' ', '')
plist.append(clean)
# If first row, create df, otherwise add to it
if (rn == 0) :
df = pd.DataFrame(plist)
else :
df2 = pd.DataFrame(plist)
colname = 'y' + str(2019-rn)
df[colname] = df2
if (company == 1):
dft = df.T
# Get header Column
head = dft.iloc[0]
# Exclude first row from the data
dft = dft[1:]
dft.columns = head
dft = dft.reset_index()
# Assign Headers
dft = dft.drop(['index'], axis = 'columns')
else:
dft2 = df.T
# Get header Column
head = dft2.iloc[0]
# Exclude first row from the data
dft2 = dft2[1:]
dft2.columns = head
dft2 = dft2.reset_index()
# Assign Headers
dft2 = dft2.drop(['index'], axis = 'columns')
dft['Symbol'] = ThisSymbol
dft = dft.append(dft2, sort=['Year','Symbol'])
company = company +1
dft
my output looks this, i want to have a symbol column to each respective firm data
Symbol,i have added
dft['Symbol'] = ThisSymbol
but it add just first company from the list to all companies data
enter image description here

AttributeError: 'H2OFrame' object has no attribute 'lower' when converting confusion matrix to data frame

I am trying to convert the confusion matrix to a python 2D list so I can access the components.
I am getting an error when trying to convert a confusion matrix to a data frame.
import h2o
from h2o.estimators.gbm import H2OGradientBoostingEstimator
import pandas as pd
h2o.init()
training_file = "AirlinesTrain.csv"
train = h2o.import_file(training_file)
response_col = "IsDepDelayed"
distribution = "multinomial"
project_name = "airlines"
problem_type = "binary-classification"
predictors = train.columns
gbm = H2OGradientBoostingEstimator(nfolds=3,
distribution=distribution)
gbm.train(x=predictors,
y=response_col,
training_frame=train)
print("gbm.confusion_matrix(train).as_data_frame()")
print(gbm.confusion_matrix(train).as_data_frame())#This errors AttributeError: 'H2OFrame' object has no attribute 'lower'
NOTE: if I use the cars dataset, there are no errors:
cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
cars["cylinders"] = cars["cylinders"].asfactor()
#r = cars[0].runif()
#train = cars[r > .2]
#valid = cars[r <= .2]
train=cars
response_col = "cylinders"
distribution = "multinomial"
predictors = ["displacement","power","weight","acceleration","year"]
Ran into this same issue. Seems there may be something wrong in the docs as it mentions you can pass a H2OFrame.
https://docs.h2o.ai/h2o/latest-stable/h2o-docs/performance-and-prediction.html
However i think if you passed train=True it would work
print(gbm.confusion_matrix(train=True).as_data_frame())

Defining URL list for crawler, syntax issues

I'm currently running the following code:
import requests
from bs4 import BeautifulSoup
from urlparse import urljoin
def hltv_match_list(max_offset):
offset = 0
while offset < max_offset:
url = 'http://www.hltv.org/?pageid=188&offset=' + str(offset)
base = "http://www.hltv.org/"
soup = BeautifulSoup(requests.get("http://www.hltv.org/?pageid=188&offset=0").content, 'html.parser')
cont = soup.select("div.covMainBoxContent a[href*=matchid=]")
href = urljoin(base, (a["href"] for a in cont))
# print([urljoin(base, a["href"]) for a in cont])
get_hltv_match_data(href)
offset += 50
def get_hltv_match_data(matchid_url):
source_code = requests.get(matchid_url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, 'html.parser')
for teamid in soup.findAll("div.covSmallHeadline a[href*=teamid=]"):
print teamid.string
hltv_match_list(5)
Errors:
File "C:/Users/mdupo/PycharmProjects/HLTVCrawler/Crawler.py", line 12, in hltv_match_list
href = urljoin(base, (a["href"] for a in cont))
File "C:\Python27\lib\urlparse.py", line 261, in urljoin
urlparse(url, bscheme, allow_fragments)
File "C:\Python27\lib\urlparse.py", line 143, in urlparse
tuple = urlsplit(url, scheme, allow_fragments)
File "C:\Python27\lib\urlparse.py", line 182, in urlsplit
i = url.find(':')
AttributeError: 'generator' object has no attribute 'find'
Process finished with exit code 1
I think I'm having trouble with the href = urljoin(base, (a["href"] for a in cont)) part as I'm trying to create a url list I can feed into get_hltv_match_datato then capture various items within that page. Am I going about this wrong?
Cheers
You need to join each href as per your commented code:
urls = [urljoin(base,a["href"]) for a in cont]
You are trying to join the base url to a generator i.e (a["href"] for a in cont) which makes no sense.
You should also be passing the url to requests or you are going to be requesting the same page over and over.
soup = BeautifulSoup(requests.get(url).content, 'html.parser')

Resources