PySide2 QTextEdit setup markdown css in maya - maya

I want to see a view in the form of markdown in Maya. The next code is to try it.
from PySide2 import QtWidgets, QtGui, QtWebEngineWidgets
message = """
```python
import pymel.core as pm
print("Hello World")
```
# Test Message
## Test Message
"""
dialog = QtWidgets.QDialog()
layout = QtWidgets.QVBoxLayout(dialog)
web_engine = QtWebEngineWidgets.QWebEngineView()
p_btn = QtWidgets.QPushButton("Ok")
layout.addWidget(web_engine)
layout.addWidget(p_btn)
doc = QtGui.QTextDocument()
doc.setMarkdown(message)
web_engine.setHtml(doc.toHtml())
p_btn.clicked.connect(dialog.accept)
dialog.setWindowTitle("Notes")
dialog.exec_()
However, as a result, the code block does not display properly. I'm trying to add markdown CSS to HTML, but no progress has been made. I need your help.

Related

Unable to get Selenium Code Working to POST data to Court (PACER) Practice/Training Site

I have been trying to create a script to do the following while I have access to a Training Web-site I am using for electronic case filings. This requires the following steps:
Going to the following site: https://ecf-train.nvb.uscourts.gov/
clicking the html link at this site on this link: "District of Nevada Train Database - Document Filing System"
This then redirects me to this site, where I POST my login credentials (username/pw/testclientcode/flag=1): (https://train-login.uscourts.gov/csologin/login.jsf?pscCourtId=NVTBK&appurl=https://ecf-train.nvb.uscourts.gov/cgi-bin/login.pl)
the "flag" checks a "redaction" clicks in and then is supposed to land me or give me the main access page to select what I want to e-file in the testing system (this is all approved by the Court for testing, FYI). I am attaching a screenshot of what it should look like when logged in successfully, so I can then click the "Bankruptcy" header link ECF Header Links, which then brings me to the next target page (https://ecf-train.nvb.uscourts.gov/cgi-bin/DisplayMenu.pl?BankruptcyEvents&id=1227536), where I can select the link for "Case Upload" (see attached screenshot for 'CaseUpload')Case Upload Selection Link
I am new to Python, but I believe I took all the correct steps to download the Selenium Chrome Browser and install Selenium using "pip install selenium". However, my code is not finding the webdriver "PATH", which is a local desktop directory saved directly on my "C:" drive.
Any help getting this work is HUGELY appreciated. Here is the code I have been using python with the errors as well:
CODE:
import requests
from selenium.webdriver.chrome.service import Service
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import sys
import os
import json
import time`
# pass in header info which is "Content-Type:application/json"
url = "https://ecf-train.nvb.uscourts.gov/"
# payload contains the header information for Conent Type and Accept
payload = {"Content-Type":"application/json", "Accept":"application/json"}
r = requests.post(url, data=payload)
pacer_target = requests.get(url)
pacer_target.text
print(r.text)
# defining the api-endpoint
API_ENDPOINT = 'url'
requests.get(API_ENDPOINT.text)
# data to be sent to api - I removed by userid and pw from this post
data = {
"loginId":"XXXXXXXXX",
"password":"XXXXXXXXX",
"clientCode":"anycode",
"redactFlag":"1"}
# sending post request and saving response as response object
r = requests.post(url = API_ENDPOINT, data = data)
print(r.text, "STEP 1 - logged into main Traing screen!!")
# Import Selenium Executable File for Chrome from C:\\ drive on my desktop and use the location path
PATH = 'C:\chromedriver.exe'
driver = webdriver.Chrome(PATH)
# browser = webdriver.Chrome()
continue_link = browser.find_element(By.PARTIAL_LINK_TEXT, 'cgi-bin/login')
continue_link.click()
# get response from the clicked URL and print the response
response = requests.get(url)
print(url.text, "this is step 2 - just clicked the main login button!!!!!")
# sending post request and saving response as response object
r = requests.post(url = API_ENDPOINT, data = data)
# extracting response text
pastebin_url = r.text
print("The pastebin URL is:%s"%pastebin_url)

Click on load more button using selenium not working properly in python3.7

As I am scraping, the page is dynamic with the 'load more' button.
I used selenium for that.
The first problem is that it is only working only one time. means clicking load more button the only first time.
The second problem is that it is scraping only the articles that are before the first load more button. Not scraping after that.
The third problem is that it is scraping all the articles twice.
The fourth problem is I only want the date but it is giving along with the date, the author and place also.
import time
import requests
from bs4 import BeautifulSoup
from bs4.element import Tag
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
base = "https://indianexpress.com"
browser = webdriver.Safari(executable_path='/usr/bin/safaridriver')
wait = WebDriverWait(browser, 10)
browser.get('https://indianexpress.com/?s=cybersecurity')
while True:
try:
time.sleep(6)
show_more = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, 'Load More')))
show_more.click()
except Exception as e:
print(e)
break
soup = BeautifulSoup(browser.page_source,'lxml')
search_results = soup.find('div', {'id':'ie-infinite-scroll'})
links = search_results.find_all('a')
for link in links:
link_url = link['href']
response = requests.get(link_url)
sauce = BeautifulSoup(response.text, 'html.parser')
dateTag = sauce.find('div', {'class':'m-story-meta__credit'})
titleTag = sauce.find('h1', {'class':'m-story-header__title'})
contentTag = ' '.join([item.get_text(strip=True) for item in sauce.select("[class^='o-story-content__main a-wysiwyg'] p")])
date = None
title = None
content = None
if isinstance(dateTag, Tag):
date = dateTag.get_text().strip()
if isinstance(titleTag, Tag):
title = titleTag.get_text().strip()
print(f'{date}\n {title}\n {contentTag}\n')
time.sleep(3)
There is no error in this code. But it needs refinement. What should I do to solve above-mentioned problems?
Thanks.
Because you are not waiting the new content. While the new content is waiting to loading, you are trying to click to the 'load more' button.
Error message:
Message: Element <a class="m-featured-link m-featured-link--centered ie-load-more" href="#"> is not clickable at point (467,417) because another element <div class="o-listing__load-more m-loading"> obscures it
My solution:
while True:
try:
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[contains(#class, 'ie-load-more')]")))
browser.find_element_by_xpath("//a[contains(#class, 'ie-load-more')]").click()
wait.until(EC.visibility_of_element_located((By.XPATH,"//div[#class='o-listing__load-more']")))
except Exception as e:
print(e)
break

How to get links using selenium and scrape using beautifulsoup?

I want to collect articles from this particular website. I was using Beautifulsoup only earlier but it was not grabbing the links. So I tried to use selenium. Now I tried to write this code. This is giving output 'None'. I have never used selenium before, so I don't have much idea about it. What should I change in this code to make it work and give the desired results?
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
base = 'https://metro.co.uk'
url = 'https://metro.co.uk/search/#gsc.tab=0&gsc.q=cybersecurity&gsc.sort=date&gsc.page=7'
browser = webdriver.Safari(executable_path='/usr/bin/safaridriver')
wait = WebDriverWait(browser, 10)
browser.get(url)
link = browser.find_elements_by_class_name('gs-title')
for links in link:
links.get_attribute('href')
soup = BeautifulSoup(browser.page_source, 'lxml')
date = soup.find('span', {'class': 'post-date'})
title = soup.find('h1', {'class':'headline'})
content = soup.find('div',{'class':'article-body'})
print(date)
print(title)
print(content)
time.sleep(3)
browser.close()
I want to collect the date, title, and content from all the articles on this page and other pages also like page no 7 to 18.
Thank you.
Instead of using Selenium to get the anchors, I tried to extract the page source first with the help of Selenium and then used Beautiful Soup on it.
So, to put it in perspective:
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
base = 'https://metro.co.uk'
url = 'https://metro.co.uk/search/#gsc.tab=0&gsc.q=cybersecurity&gsc.sort=date&gsc.page=7'
browser = webdriver.Safari(executable_path='/usr/bin/safaridriver')
#wait = WebDriverWait(browser, 10) #Not actually required
browser.get(url)
soup = BeautifulSoup(browser.page_source, 'html.parser') #Get the Page Source
anchors = soup.find_all("a", class_ = "gs-title") #Now find the anchors
for anchor in anchors:
browser.get(anchor['href']) #Connect to the News Link, and extract it's Page Source
sub_soup = BeautifulSoup(browser.page_source, 'html.parser')
date = sub_soup.find('span', {'class': 'post-date'})
title = sub_soup.find('h1', {'class':'post-title'}) #Note that the class attribute for the heading is 'post-title' and not 'headline'
content = sub_soup.find('div',{'class':'article-body'})
print([date.string, title.string, content.string])
#time.sleep(3) #Even this I don't believe is required
browser.close()
With this modification, I believe you can get your required contents.
You can use same API as page uses. Alter parameters to get all pages of results
import requests
import json
import re
r = requests.get('https://cse.google.com/cse/element/v1?rsz=filtered_cse&num=10&hl=en&source=gcsc&gss=.uk&start=60&cselibv=5d7bf4891789cfae&cx=012545676297898659090:wk87ya_pczq&q=cybersecurity&safe=off&cse_tok=AKaTTZjKIBzl-5fANH8dQ8f78cv2:1560500563340&filter=0&sort=date&exp=csqr,4229469&callback=google.search.cse.api3732')
p = re.compile(r'api3732\((.*)\);', re.DOTALL)
data = json.loads(p.findall(r.text)[0])
links = [item['clicktrackUrl'] for item in data['results']]
print(links)

<class 'google.appengine.runtime.DeadlineExceededError'>: how to get around?

Ok guys I am having tons of problems getting my working dev server to a working production server :). I have a task that will go through and request urls and collect and update data. It takes 30 minutes to run.
I uploaded to production server and going to the url with its corresponding .py script appname.appspot.com/tasks/rrs after 30 seconds I am getting the class google.appengine.runtime.DeadlineExceededError' Is there any way to get around this? Is this a 30 second deadline for a page? This script works fine in development server I go to the url and the associate .py script runs until completion.
import time
import random
import string
import cPickle
from StringIO import StringIO
try:
import json
except ImportError:
import simplejson as json
import urllib
import pprint
import datetime
import sys
sys.path.append("C:\Program Files (x86)\Google\google_appengine")
sys.path.append("C:\Program Files (x86)\Google\google_appengine\lib\yaml\lib")
sys.path.append("C:\Program Files (x86)\Google\google_appengine\lib\webob")
from google.appengine.api import users
from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app
from google.appengine.ext import db
class SR(db.Model):
name = db.StringProperty()
title = db.StringProperty()
url = db.StringProperty()
##request url and returns JSON_data
def overview(page):
u = urllib.urlopen(page)
bytes = StringIO(u.read())
##print bytes
u.close()
try:
JSON_data = json.load(bytes)
return JSON_data
except ValueError,e:
print e," Couldn't get .json for %s" % page
return None
##specific code to parse particular JSON data and append new SR objects to the given url list
def parse_json(JSON_data,lists):
sr = SR()
sr.name = ##data gathered
sr.title = ##data gathered
sr.url = ##data gathered
lists.append(sr)
return lists
## I want to be able to request lets say 500 pages without timeing out
page = 'someurlpage.com'##starting url
url_list = []
for z in range(0,500):
page = 'someurlpage.com/%s'%z
JSON_data = overview(page)##get json data for a given url page
url_list = parse_json(JSON_data,url_list)##parse the json data and append class objects to a given list
db.put(url_list)##finally add object to gae database
Yes, the App Engine imposes a 30 seconds deadline. One way around it might be a try/except DeadlineExceededError and putting the rest in a taskqueue.
But you can't make your requests run for a longer period.
You can also try Bulkupdate
Example:
class Todo(db.Model):
page = db.StringProperty()
class BulkPageParser(bulkupdate.BulkUpdater):
def get_query(self):
return Todo.all()
def handle_entity(self, entity):
JSON_data = overview(entity.page)
db.put(parse_json(JSON_data, [])
entity.delete()
# Put this in your view code:
for i in range(500):
Todo(page='someurlpage.com/%s' % i).put()
job = BulkPageParser()
job.start()
ok so if I am dynamically adding links as I am parsing the pages, I would add to the todo queue like so I believe.
def handle_entity(self, entity):
JSON_data = overview(entity.page)
data_gathered,new_links = parse_json(JSON_data, [])##like earlier returns the a list of sr objects, and now a list of new links/pages to go to
db.put(data_gathered)
for link in new_links:
Todo(page=link).put()
entity.delete()

Crawling: Extract all text and links(href and ng-href) from AngularJs website and Crawl

After a lot of struggle trying to crawl an angular Js page with Single sign-on I have put up this code. This code runs fine, logins opens the desired page and scraps it but I am not getting all the links and text present in the website loaded by angular. My xpath seems to be correct.
Also it is not crawling the links that are getting extracted. What do I need to change in my code to extract all text present in the website and the subsequent webpages?
import scrapy
from scrapy import signals
from scrapy.http import TextResponse
from scrapy.xlib.pydispatch import dispatcher
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from ps_crawler.items import PsCrawlerItem
import time
from selenium.webdriver.common.keys import Keys
class SISSpider(scrapy.Spider):
name = "SIS"
allowed_domains = ["domain.com"]
start_urls = ["https://domain.com/login?"]
def __init__(self):
self.driver = webdriver.Chrome()
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_closed(self, spider):
self.driver.close()
def parse(self, response):
# selenium part of the job
self.driver.get("https://domain.com/login?")
time.sleep(5)
self.driver.find_element_by_xpath('//*[#id="Login"]/div[2]/div[1]/div[2]/form/div[1]/input').send_keys("ssasdad")
self.driver.find_element_by_xpath('//*[#id="Login"]/div[2]/div[1]/div[2]/form/div[2]/input').send_keys("")
#self.driver.find_element_by_xpath('//*[#id="login"]').click()
more_btn = WebDriverWait(self.driver, 10).until(
EC.visibility_of_element_located((By.XPATH, '//*[#id="login"]'))
)
time.sleep(5)
more_btn.click()
time.sleep(5)
self.driver.execute_script("window.open('https://domain.com/#/admin','_blank');");
time.sleep(10)
window_now = self.driver.window_handles[1]
self.driver.switch_to_window(window_now)
## stop when we reach the desired page
#if self.driver.current_url.endswith('page=20'):
# break
#now scrapy should do the job
time.sleep(10)
response = TextResponse(url=self.driver.current_url, body=self.driver.page_source, encoding='utf-8')
time.sleep(10)
for post in response.xpath('//div'):
item = PsCrawlerItem()
print post.xpath('a/span/text()').extract(), post.xpath('a/#href').extract(), post.xpath('a/#ng-href').extract()
You just need to tweak your xpath a little bit as follows. Hope this solves the problem.
for post in response.xpath('//body'):
print post.xpath('//text()').extract(), post.xpath('//a//#href').extract()

Resources