making multiple request at the same time with the module requests and print the output on excel(python)

Hi this is quite a complicated question
From this script:

workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")

stock=[here goes a list of 2000+ stock tickers as strings]
sector = []
peg_ratio = []
foward_eps = []
for idx in range(len(stock)):
   url_profile='https://finance.yahoo.com/quote/{}/profile?p={}'
   headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"}
   response =requests.get(url_profile.format(stock[idx],stock[idx]),headers=headers)
   soup = BeautifulSoup(response.text,'html.parser')
   pattern = re.compile(r's--sDatas--s')
   script_data = soup.find('script',text=pattern).contents[0]
   start = script_data.find("context")-2
   json_data=json.loads(script_data[start:-12])

    try:
        sector.append(json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['industry'])
    except:
        sector.append("Error")

   url_stats = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'
   headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"}
   response = requests.get(url_stats.format(stock[idx], stock[idx]), headers=headers)
   soup = BeautifulSoup(response.text, 'html.parser')
   pattern = re.compile(r's--sDatas--s')
   script_data = soup.find('script', text=pattern).contents[0]
   start = script_data.find("context") - 2
   json_data = json.loads(script_data[start:-12])
    try:
        peg_ratio.append(
            json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['pegRatio'][
                'fmt'])
    except:
        peg_ratio.append("Error")
    try:
        foward_eps.append(
            json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['forwardEps'][
                'fmt'])
    except:
        foward_eps.append("Error")
    worksheet.write("A" + str(idx + 1), stock[idx])
    worksheet.write("B" + str(idx + 1), sector[idx])
    worksheet.write("C" + str(idx+1), foward_eps[idx])
    worksheet.write("D" + str(idx + 1), peg_ratio[idx])
workbook.close()

The code by itself do what is supposed to do (getting the data:foward eps,peg ratio,sector and paste them on an excel file)but the issue is that it takes a lot of time and the list stock is quite long(2531 elements) is there a way to make this code more efficent or faster?

I have attemped to follow the instruction from this video:https://www.youtube.com/watch?v=nFn4_nA_yk8
But i still need to write the information of any single stock in the excel file is there a way i can optimize all of this ?
Maybe by sending multiple request at the same time and write the data on the excel on a different time?
The only end goal is to make the whole process as fast as possible.
Thanks in advance(if you need any other information leave a comment i will answer as soon as possible)

Contents hide

Answers:

Method 1

Answers:

Thank you for visiting the Q&A section on Magenaut. Please note that all the answers may not help you solve the issue immediately. So please treat them as advisements. If you found the post helpful (or not), leave a comment & I’ll get back to you as soon as possible.

Method 1

First you have to put code in function

# --- globals ---

url_profile = 'https://finance.yahoo.com/quote/{}/profile?p={}'
url_stats = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'

headers = {
   "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
}

pattern = re.compile(r's--sDatas--s')

# --- functions ---

def process(number, stock_name):
    print(f'{number} {stock_name}n', end='', flush=True)
    
    url = url_profile.format(stock_name, stock_name)  
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text,'html.parser')
    script_data = soup.find('script', text=pattern).contents[0]
    start = script_data.find("context")-2
    data = json.loads(script_data[start:-12])

    try:
        sector = data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['industry']
    except:
        sector = "Error"

    url = url_stats.format(stock_name, stock_name)
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    script_data = soup.find('script', text=pattern).contents[0]
    start = script_data.find("context") - 2
    data = json.loads(script_data[start:-12])
    
    try:
        peg_ratio = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['pegRatio']['fmt']
    except:
        peg_ratio = "Error"
        
    try:
        foward_eps = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['forwardEps']['fmt']
    except:
        foward_eps = "Error"

    # return data - for thread        
    results[number] = (stock_name, sector, foward_eps, foward_eps, peg_ratio)

    # return data - for normal execution
    return (stock_name, sector, foward_eps, foward_eps, peg_ratio)

And next you can run it in old way

stock = ['AAPL', 'GOOG', 'TESL', 'MSFT', 'AAPL', 'GOOG', 'TESL', 'MSFT']

_start = time.time()

results = {}

workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")

for number, stock_name in enumerate(stock, 1):

    data = process(number, stock_name)

    worksheet.write(f"A{number}", data[0]) #stock_name
    worksheet.write(f"B{number}", data[1]) #sector
    worksheet.write(f"C{number}", data[2]) #foward_eps
    worksheet.write(f"D{number}", data[3]) #peg_ratio
    
workbook.close()

_end = time.time()

print(_end - _start)

and this gives me time ~15s, (but sometimes even ~32s)

And now you can use threading to run the same function with different values at the same time.

Because thread can’t return result directly so I use global dictionary results for this (because threads share memory).

stock = ['AAPL', 'GOOG', 'TESL', 'MSFT', 'AAPL', 'GOOG', 'TESL', 'MSFT']

_start = time.time()

threads = []
results = {}

workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")

# start all threads
for number, stock_name in enumerate(stock, 1):
    t = threading.Thread(target=process, args=(number, stock_name))
    t.start()
    threads.append(t)
    
# wait for end of all threads
for t in threads:
    t.join()
    
# use results    
for number, data in results.items():
    #(stock_name, sector, foward_eps, foward_eps, peg_ratio) = data
    worksheet.write(f"A{number}", data[0]) #stock_name
    worksheet.write(f"B{number}", data[1]) #sector
    worksheet.write(f"C{number}", data[2]) #foward_eps
    worksheet.write(f"D{number}", data[3]) #peg_ratio
    
workbook.close()

_end = time.time()

print(_end - _start)

And this gives me time ~6s

For more stocks it would be better to use Threading.Pool so it would run only few threads at the same time because running 2000+ threads at the same time is not good idea.

Full working code

import requests
import time
import xlsxwriter
import re
from bs4 import BeautifulSoup
import json
import threading

# --- globals ---

url_profile = 'https://finance.yahoo.com/quote/{}/profile?p={}'
url_stats = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'

headers = {
   "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
}

pattern = re.compile(r's--sDatas--s')

# --- functions ---

def process(number, stock_name):
    print(f'{number} {stock_name}n', end='', flush=True)
    
    url = url_profile.format(stock_name, stock_name)  
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text,'html.parser')
    script_data = soup.find('script', text=pattern).contents[0]
    start = script_data.find("context")-2
    data = json.loads(script_data[start:-12])

    try:
        sector = data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['industry']
    except:
        sector = "Error"

    url = url_stats.format(stock_name, stock_name)
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    script_data = soup.find('script', text=pattern).contents[0]
    start = script_data.find("context") - 2
    data = json.loads(script_data[start:-12])
    
    try:
        peg_ratio = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['pegRatio']['fmt']
    except:
        peg_ratio = "Error"
        
    try:
        foward_eps = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['forwardEps']['fmt']
    except:
        foward_eps = "Error"

    # return data - for thread        
    results[number] = (stock_name, sector, foward_eps, foward_eps, peg_ratio)

    # return data - for normal execution
    return (stock_name, sector, foward_eps, foward_eps, peg_ratio)

# --- main ---

stock = [
    'AAPL', 'GOOG', 'TESL', 'MSFT',
    'AAPL', 'GOOG', 'TESL', 'MSFT',
    'AAPL', 'GOOG', 'TESL', 'MSFT',
    'AAPL', 'GOOG', 'TESL', 'MSFT',
]

# --- old version ---

_start = time.time()

results = {}

workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")

for number, stock_name in enumerate(stock, 1):
    data = process(number, stock_name)
    #(stock_name, sector, foward_eps, foward_eps, peg_ratio) = data
    worksheet.write(f"A{number}", data[0]) #stock_name
    worksheet.write(f"B{number}", data[1]) #sector
    worksheet.write(f"C{number}", data[2]) #foward_eps
    worksheet.write(f"D{number}", data[3]) #peg_ratio
    
workbook.close()

_end = time.time()

print(_end - _start)

# --- new version ---

_start = time.time()

threads = []
results = {}

workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")

# start all threads
for number, stock_name in enumerate(stock, 1):
    t = threading.Thread(target=process, args=(number, stock_name))
    t.start()
    threads.append(t)
    
# wait for end of all threads
for t in threads:
    t.join()
    
# use results    
for number, data in results.items():
    #(stock_name, sector, foward_eps, foward_eps, peg_ratio) = data
    worksheet.write(f"A{number}", data[0]) #stock_name
    worksheet.write(f"B{number}", data[1]) #sector
    worksheet.write(f"C{number}", data[2]) #foward_eps
    worksheet.write(f"D{number}", data[3]) #peg_ratio
    
workbook.close()

_end = time.time()

print(_end - _start)

Version with Pool

import requests
import time
import xlsxwriter
import re
from bs4 import BeautifulSoup
import json
import threading
import threading
from multiprocessing.pool import ThreadPool

# --- globals ---

url_profile = 'https://finance.yahoo.com/quote/{}/profile?p={}'
url_stats = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'

headers = {
   "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
}

pattern = re.compile(r's--sDatas--s')

# --- functions ---

def process(number, stock_name):
    print(f'{number} {stock_name}n', end='', flush=True)
    
    url = url_profile.format(stock_name, stock_name)  
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text,'html.parser')
    script_data = soup.find('script', text=pattern).contents[0]
    start = script_data.find("context")-2
    data = json.loads(script_data[start:-12])

    try:
        sector = data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['industry']
    except:
        sector = "Error"

    url = url_stats.format(stock_name, stock_name)
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    script_data = soup.find('script', text=pattern).contents[0]
    start = script_data.find("context") - 2
    data = json.loads(script_data[start:-12])
    
    try:
        peg_ratio = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['pegRatio']['fmt']
    except:
        peg_ratio = "Error"
        
    try:
        foward_eps = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['forwardEps']['fmt']
    except:
        foward_eps = "Error"

    # return data - for thread        
    results[number] = (stock_name, sector, foward_eps, foward_eps, peg_ratio)

    # return data - for normal execution
    return (stock_name, sector, foward_eps, foward_eps, peg_ratio)

# --- main ---

stock = [
    'AAPL', 'GOOG', 'TESL', 'MSFT',
    'AAPL', 'GOOG', 'TESL', 'MSFT',
    'AAPL', 'GOOG', 'TESL', 'MSFT',
    'AAPL', 'GOOG', 'TESL', 'MSFT',
]

_start = time.time()

results = {}

workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")

with ThreadPool(processes=10) as pool:
    pool_results = pool.starmap_async(process, enumerate(stock, 1))
    pool_results = pool_results.get()
    for number, data in enumerate(pool_results, 1):
        #(stock_name, sector, foward_eps, foward_eps, peg_ratio) = data
        worksheet.write(f"A{number}", data[0]) #stock_name
        worksheet.write(f"B{number}", data[1]) #sector
        worksheet.write(f"C{number}", data[2]) #foward_eps
        worksheet.write(f"D{number}", data[3]) #peg_ratio
        
workbook.close()

_end = time.time()

print(_end - _start)

All methods was sourced from stackoverflow.com or stackexchange.com, is licensed under cc by-sa 2.5, cc by-sa 3.0 and cc by-sa 4.0

0 0 votes

Article Rating