Scrape a web page behind a login
from requests_html import HTMLSession
session = HTMLSession()
login_page = session.post(
"https://example.com/login.php",
data={
"username": "myles",
"password": "areallygoodpassword"
}
)
if not login_page.ok:
raise Exception
secret_page = session.get(
"https://example.com/admin/index.php",
cookies=login_page.cookies
)
if not secret_page.pk:
raise Exception
from pyquery import PyQuery as pq
doc = pq("""<dl>
<dt>First name</dt>
<dd>Dolores</dd>
<dt>Last name</dt>
<dd>Abernathy</dd>
<dt>ID number</dt>
<dd>CH465517080</dd>
<dt>Status</dt>
<dd>Conscious</dd>
<dt>Park</dt>
<dd>Westworld</dd>
<dt>Narrative Role</dt>
<dd>Rancher's daughter<dd>
</dl>""")
data = {}
for dt_el, dd_el in zip(*(iter(doc.find("dt, dd")),) * 2):
data[dt_el.text] = dd_el.text