I am creating a project that scrapes indeeds website and it was working fine but when I ran it today, all of a sudden without having made any changes, instead of returning the entire page of results, it no only displays the first result in duplicates. May someone help me correct this
from tkinter import * import random import urllib.request from bs4 import BeautifulSoup from selenium import webdriver import time import pandas as pd import requests html_text = requests.get('https://www.ign.com/').text soup = BeautifulSoup(html_text, 'lxml') jobs = soup.find('section',class_='right') #print(html_text) driver = webdriver.Chrome(executable_path='/Users/Miscellaneous/PycharmProjects/RecursivePractice/chromedriver') url= "https://www.indeed.com/jobs?q=developer&l=Westbury%2C%20NY&vjk=0b0cbe29e5f86422" driver.maximize_window() driver.get(url) time.sleep(5) content = driver.page_source.encode('utf-8').strip() soup = BeautifulSoup(content,"html.parser") officials = soup.findAll("a",{"class":"tapItem"} for official in officials: jobTitle = soup.find('h2',{'class': 'jobTitle'}).text companyName = soup.find('div',{'class': 'comapny_location'}) location = soup.find('div',{'class': 'companyLocation'}).text salary = soup.find('div',{'class': 'salary-snippet'}) actualSalary = salary.find('span').text summary = soup.find('div',{'class': 'job-snippet'}).text print('Title: ' + str(jobTitle) + 'nCompany Name: ' + str(companyName) + 'nLocation: ' + str(location) + 'nSalary: ' + str(actualSalary) + "nSummary: " + str(summary)) #print(str(official)) print(' ') driver.quit()
Answers:
Thank you for visiting the Q&A section on Magenaut. Please note that all the answers may not help you solve the issue immediately. So please treat them as advisements. If you found the post helpful (or not), leave a comment & I’ll get back to you as soon as possible.
Method 1
Try this
from tkinter import * import random import urllib.request from bs4 import BeautifulSoup from selenium import webdriver import time import pandas as pd import requests html_text = requests.get('https://www.ign.com/').text soup = BeautifulSoup(html_text, 'lxml') jobs = soup.find('section',class_='right') driver = webdriver.Chrome(executable_path='/Users/Miscellaneous/PycharmProjects/RecursivePractice/chromedriver') url= "https://www.indeed.com/jobs?q=developer&l=Westbury%2C%20NY&vjk=0b0cbe29e5f86422" driver.maximize_window() driver.get(url) time.sleep(5) content = driver.page_source.encode('utf-8').strip() soup = BeautifulSoup(content,"html.parser") officials = soup.findAll("a",{"class":"tapItem"}) for i in range(len(officials)): jobTitle = soup.findAll('h2',{'class': 'jobTitle'})[i].text companyName = soup.findAll('div',{'class': 'comapny_location'})[i].text if len(soup.findAll('div',{'class': 'comapny_location'})) > i else "NULL" location = soup.findAll('div',{'class': 'companyLocation'})[i].text if len(soup.findAll('div',{'class': 'companyLocation'})) > i else "NULL" salary = soup.findAll('div',{'class': 'salary-snippet'})[i].text if len(soup.findAll('div',{'class': 'salary-snippet'})) > i else "NULL" actualSalary = salary.find('span') summary = soup.findAll('div',{'class': 'job-snippet'})[i].text if len(soup.findAll('div',{'class': 'job-snippet'})) > i else "NULL" print('Title: ' + str(jobTitle) + 'nCompany Name: ' + str(companyName) + 'nLocation: ' + str(location) + 'nSalary: ' + str(actualSalary) + "nSummary: " + str(summary)) print(' ') driver.quit()
All methods was sourced from stackoverflow.com or stackexchange.com, is licensed under cc by-sa 2.5, cc by-sa 3.0 and cc by-sa 4.0