callParticipantsList = [] preparedRemarksContentList = [] questionAnswerContentList = [] companyNameList = [] tickerList = [] dateList = [] timeList=[] quarterList=[] listofurls = [] newlistofurls = [] for x in range(1,677): url = 'https://www.fool.com/earnings-call-transcripts/?page='+str(x) response = requests.get(url) page = response.text soup = BeautifulSoup(page, "lxml") for a in soup.find(class_="content-block listed-articles recent-articles m-np").find_all('a', href=True): listofurls.append('https://www.fool.com'+str(a['href'])) for x in range(0,len(listofurls),2): newlistofurls.append(listofurls[x]) for x in newlistofurls: url = str(x) response = requests.get(url) page = response.text soup = BeautifulSoup(page, "lxml") companyName = [soup.find(class_='article-content').find_all('p')[1].find('strong').text] for x in companyName: companyNameList.append(x) ticker = [soup.find(class_='article-content').find_all('p')[1].find(class_='ticker').text.replace(')',"").replace('(',"")] for x in ticker: tickerList.append(x) date = [soup.find(class_='article-content').find_all('p')[1].find(id='date').text] for x in date: dateList.append(x) time = [soup.find(class_='article-content').find_all('p')[1].find(id='time').text] for x in time: timeList.append(x) quarter = [str(soup.find_all('p')[3])[str(soup.find_all('p')[3]).find("<br/>"): str(soup.find_all('p')[3]).find('<span id="date">')].replace("<br/>","").replace("<br/>","")[0:2]] for x in quarter: quarterList.append(x) namesList = [] rolesList = [] for name in (soup.find(class_='article-content').find_all('h2')[-1].find_all_next('strong')): namesList.append(name.text) for role in (soup.find(class_='article-content').find_all('h2')[-1].find_all_next('em')): rolesList.append(role.text) callParticipants = [list(zip(namesList, rolesList))] for x in callParticipants: callParticipantsList.append([x]) toc = [] for x in soup.find(class_='article-content').find_all('h2'): toc.append(x.text) if 'Questions & Answers:' or 'Questions and Answers:'in toc: qanda = [] for div in soup.find(class_='article-content').find_all('h2')[2].find_all_next('p'): qanda.append(div.text) for x in qanda: if 'Duration:' in x: durationPlacement = qanda.index(x) QAContent = qanda[:durationPlacement] speaker = [] for x in range(0,len(QAContent),2): speaker.append(QAContent[x]) qaContent = [] for x in range(1,len(QAContent),2): qaContent.append(QAContent[x]) questionAnswerContent = [list(zip(speaker,qaContent))] for x in questionAnswerContent: questionAnswerContentList.append(x) else: questionAnswerContentList.append(0) preparedRemarks = [] for div in soup.find(class_='article-content').find_all('p')[4:]: preparedRemarks.append(div.text) prIndex = [] for x in preparedRemarks: if 'Operator' in x: prIndex.append(preparedRemarks.index(x)) else: None if len(prIndex) == 0: None else: preparedRemarksContent = [preparedRemarks[:prIndex[1]]] for x in preparedRemarksContent: preparedRemarksContentList.append(x)