from bs4 import BeautifulSoup
import requests
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
# Define the main URL and base URL
main_url = ""
base_url = ""
# Function to get latest start date for a job URL
def get_latest_start_date(job_url):
response = requests.get(job_url)
html_content = response.text
soup = BeautifulSoup(html_content, "html.parser")
# Initialize variable for latest start date
latest_start_date = "N/A"
# Get latest start date
job_page_subsections = soup.find_all('div', class_='job-page__subsection')
for subsection in job_page_subsections:
h3_tag = subsection.find('h3')
if h3_tag and h3_tag.text.strip() == "Latest start date":
latest_start_date = subsection.text.strip().split("\n")[-1].strip()
break
return latest_start_date
# Function to get housing information for a job URL
def get_housing_info(job_url):
response = requests.get(job_url)
html_content = response.text
soup = BeautifulSoup(html_content, "html.parser")
# Initialize variable for housing information
housing_info = "N/A"
# Get housing information
try:
housing_info = soup.find("h2", string="Housing information").find_next("p").text.strip()
except AttributeError:
pass # If housing information is not found, leave it as "N/A"
return housing_info
# Function to save job information to a text file
def save_job_info_to_file(job_data):
with open("job_info.txt", "w") as file:
for job in job_data:
file.write("Job Number: {}\n".format(job["job_number"]))
file.write("Job Title: {}\n".format(job["job_title"]))
file.write("Company Info: {}\n".format(job["company_info"]))
file.write("Job Type: {}\n".format(job["job_type"]))
file.write("Start Date: {}\n".format(job["start_date"]))
file.write("Hourly Pay: {}\n".format(job["hourly_pay"]))
file.write("Language Requirement: {}\n".format(job["language_requirement"]))
file.write("Positions Available: {}\n".format(job["positions_available"]))
file.write("Job Link: {}\n".format(job["job_link"]))
file.write("Latest Start Date: {}\n".format(job["latest_start_date"]))
file.write("Housing Information: {}\n".format(job["housing_info"]))
file.write("-" * 50 + "\n")
# Function to send email with attachment
def send_email_with_attachment(subject, body, attachment_path, from_email, to_email, password):
# Create message container
msg = MIMEMultipart()
msg['From'] = from_email
msg['To'] = to_email
msg['Subject'] = subject
# Attach message body
msg.attach(MIMEText(body, 'plain'))
# Attach file
with open(attachment_path, 'rb') as attachment:
attachment_part = MIMEBase('application', 'octet-stream')
attachment_part.set_payload(attachment.read())
# Encode file in ASCII characters
encoders.encode_base64(attachment_part)
# Add header as key/value pair to attachment part
attachment_part.add_header(
'Content-Disposition',
f'attachment; filename= {attachment_path.split("/")[-1]}',
)
# Add attachment to message and convert message to string
msg.attach(attachment_part)
# Connect to SMTP server and send email
smtp_server = smtplib.SMTP('smtp-mail.outlook.com', 587)
smtp_server.starttls()
smtp_server.login(from_email, password)
smtp_server.sendmail(from_email, to_email, msg.as_string())
smtp_server.quit()
# Send a GET request to the main URL and parse the HTML content
response = requests.get(main_url)
html_content = response.text
soup = BeautifulSoup(html_content, "html.parser")
# Find all job tiles and their corresponding URLs
job_tiles = soup.find_all("a", class_="job-tile")
jobpreurl = soup.find_all("a", class_="job-tile", href=True)
# Initialize a list to store job data
job_data = []
# Iterate over each job tile
for index, job_tile in enumerate(job_tiles, start=1):
# Extract job details
job_title = job_tile.find("h3").text.strip()
company_info = job_tile.find("div", class_="job-tile__title").find("p").text.strip()
job_type = job_tile.find("div", class_="job-tile__info").find_all("p")[0].text.strip()
start_date = job_tile.find("div", class_="job-tile__info").find_all("p")[1].text.strip()
hourly_pay = job_tile.find("div", class_="job-tile__info").find_all("p")[2].text.strip()
language_requirement = job_tile.find("div", class_="job-tile__info").find_all("p")[3].text.strip()
positions_available = job_tile.find("div", class_="job-tile__info").find_all("p")[4].text.strip()
# Extract job URL and concatenate with base_url
job_link = base_url + jobpreurl[index - 1]['href'] # Index - 1 to match the index of jobpreurl
# Get latest start date and housing information for the job
latest_start_date = get_latest_start_date(job_link)
housing_info = get_housing_info(job_link)
# Store job data in a dictionary
job_info = {
"job_number": index,
"job_title": job_title,
"company_info": company_info,
"job_type": job_type,
"start_date": start_date,
"hourly_pay": hourly_pay,
"language_requirement": language_requirement,
"positions_available": positions_available,
"job_link": job_link, # Include job link
"latest_start_date": latest_start_date, # Include latest start date
"housing_info": housing_info # Include housing information
}
# Append job info to the list
job_data.append(job_info)
# Save job information to a text file
save_job_info_to_file(job_data)
# Email configuration
from_email = "" # Your email address
to_email = "" # Recipient's email address
password = "" # Your email password
subject = "Job Information"
body = "Please look job informations."
# Send email with attachment
attachment_path = "job_info.txt"
send_email_with_attachment(subject, body, attachment_path, from_email, to_email, password)
# Print the scraped job data
for job in job_data:
print("Job Number:", job["job_number"])
print("Job Title:", job["job_title"])
print("Company Info:", job["company_info"])
print("Job Type:", job["job_type"])
print("Start Date:", job["start_date"])
print("Hourly Pay:", job["hourly_pay"])
print("Language Requirement:", job["language_requirement"])
print("Positions Available:", job["positions_available"])
print("Job Link:", job["job_link"]) # Print job link
print("Latest Start Date:", job["latest_start_date"]) # Print latest start date
print("Housing Information:", job["housing_info"]) # Print housing information
print("-" * 50) # Add a line of dashes for separation
#End of code.