The goal of this document is to scrape some ebay data to get "street" prices for computer parts from ebay.

from bs4 import BeautifulSoup
import ssl
import urllib.request, urllib.parse
import pandas as pd
import time
# GPU's, CPU's, and Motherboards I want to search
GPUs=['RTX 4090', 'RTX 4080', 'RX 7900 XTX', 'RTX 4070 Ti', 'RTX 3090 Ti',
      'RX 6950 XT', 'RX 7900 XT', 'RTX 3080 Ti', 'RTX 3090', 'RTX 3080',
      'RX 6900', 'RX 6800', 'RTX 3070', 'RTX 3070 Ti', 'RTX 2080 Ti',
      'RX 6800 XT', 'RX 6750 XT', 'RTX 3060 Ti', 'RX 6700 XT', 'RTX 2080 super',
      'RTX 2080', 'GTX 1080 Ti', 'RTX 2070 Super', 'RX 6650 XT', 'RTX 3060',
      'RX 5700 XT', 'RTX 2060 Super', 'RX 6600 XT', 'RTX 2070', 'RTX 2060',
      'GTX 1080', 'RX 6600', 'RX 5700', 'GTX 1070 Ti', 'RTX 2060', 'GTX 980 Ti',
      'RX 5600 XT', 'GTX 1070', 'GTX 1660 Super', 'GTX 1660', 'GTX 3050',
      'GTX 1650 Super','GTX 1060','RX 6500 XT', 'GTX 760 Ti', 'GTX 750 Ti',
      'GTX 1050']
CPUs= ['Ryzen 5 3600', 'Ryzen 5 5500', 'Ryzen 5 5600', 'Ryzen 5 4600G',
       'Ryzen 7 2700X', 'Ryzen 5 3600X', 'Ryzen 7 4700', 'Ryzen 7 5700',
       'Ryzen 5 5600X', 'Ryzen 7 5800', 'Ryzen 5 7600X', 'Ryzen 5 7600',
       'Ryzen 9 5900', 'Ryzen 9 3950X']
Motherboards= ['b450', 'b550', 'b460', 'b560']

Things to do.

  1. make a loop that goes through all the items in the GPUs list.
  2. For each item in the loop, replace the space with +.
  3. Do a search for those terms & make a similar dataframe to before
  4. Put the name, min, max, mean, sd in a master dataframe.
master_df = pd.DataFrame(columns=['name', 'min price', 'mean price', 'max price'])
master_df['name']=GPUs
# what is the length of GPUs?
# print('Number of GPUs to search: ',len(GPUs))
# 'https://www.ebay.com/sch/27386/i.html?_from=R40&_nkw=RX%206950%20XT&LH_ItemCondition=1500%7C2000%7C2500%7C3000%7C1000&LH_Sold=1&LH_Complete=1&_ipg=240&rt=nc&_udlo=50'
# what is the string to put the search in?
urlp1="https://www.ebay.com/sch/27386/i.html?_from=R40&_nkw="
urlp2="&LH_ItemCondition=1500%7C2000%7C2500%7C3000%7C1000&LH_Sold=1&LH_Complete=1&_ipg=240&rt=nc&_udlo=50"
j=0
for gpu in GPUs:
  searchstr=(gpu.replace(" ", "%20"))
  url=str.join("", [urlp1,searchstr,urlp2])
  #print(url)
  html=urllib.request.urlopen(url).read()
  soup=BeautifulSoup(html,'html.parser')
  main_data=soup.find_all('div',class_="s-item__info clearfix")

  names=[]
  prices=[]
  i=0
  for line in main_data:
    #print(i)
    if "to" not in line.find("span",class_="s-item__price").get_text():
      #print("yes")
      names.append(line.select_one(".s-item__title span").text)
      prices.append(line.find("span",class_="s-item__price").get_text())

    #df['price'][i]=price
  prices = [price.replace("$", "") for price in prices]
  prices = [price.replace(",", "") for price in prices]
  prices = [float(price) for price in prices]
  #prices=float(prices)
  df = pd.DataFrame({
    "name": names,
    "price": prices
  })
  df=df.drop(0)
  #print('Max Price',df['price'].max())
  #print('Mean Price',df['price'].mean())
  #print('Min Price',df['price'].min())

#time.sleep(0.1)
  master_df['min price'][j]=df['price'].min()
  master_df['mean price'][j]=df['price'].mean()
  master_df['max price'][j]=df['price'].max()
  j=j+1
  #print(master_df.head())
print(master_df)
              name min price   mean price max price
0         RTX 4090     390.0  1608.450625    4000.0
1         RTX 4080    174.45   893.395083   1836.47
2      RX 7900 XTX    348.99   801.369958    5400.0
3      RTX 4070 Ti      50.0   649.909623    1240.8
4      RTX 3090 Ti     79.99   748.848667    1775.0
5       RX 6950 XT    448.89   568.597829     849.0
6       RX 7900 XT     430.0   682.216642    978.02
7      RTX 3080 Ti     250.0   612.698042    2565.0
8         RTX 3090     79.99   748.997667    1775.0
9         RTX 3080     250.0   494.421208   1117.99
10         RX 6900      50.0   521.672083    1300.0
11         RX 6800     127.5    409.69125   1429.99
12        RTX 3070     199.0     321.0765    689.75
13     RTX 3070 Ti     199.0   321.713833    895.41
14     RTX 2080 Ti     59.95   345.211375     965.0
15      RX 6800 XT     127.5   436.225083   1429.99
16      RX 6750 XT     255.0   318.173854    498.29
17     RTX 3060 Ti     156.5   248.399167    474.18
18      RX 6700 XT      99.0    271.81725    856.99
19  RTX 2080 super     59.95   233.068125    479.95
20        RTX 2080     59.95     297.0785     965.0
21     GTX 1080 Ti      61.0   183.697875     465.0
22  RTX 2070 Super     59.95   198.630667    374.99
23      RX 6650 XT     100.0   206.616396    679.99
24        RTX 3060      56.0   240.172917     500.0
25      RX 5700 XT     70.11   139.833417     400.0
26  RTX 2060 Super      99.0    165.87525    274.99
27      RX 6600 XT     110.0   178.047125    1360.0
28        RTX 2070     59.95   185.863125    586.01
29        RTX 2060      82.0   157.454792     570.0
30        GTX 1080      61.0   154.951833     426.5
31         RX 6600     100.0    168.43725    1360.0
32         RX 5700      65.0      134.244    299.99
33     GTX 1070 Ti      50.0   106.886875     210.0
34        RTX 2060      82.0   157.683375     570.0
35      GTX 980 Ti      50.0     94.57392    379.99
36      RX 5600 XT      60.0   109.589458     600.0
37        GTX 1070      58.8    99.379038    495.95
38  GTX 1660 Super      53.0     104.8985    199.99
39        GTX 1660      54.0     103.3435     200.0
40        GTX 3050     100.0   248.365263     495.0
41  GTX 1650 Super      50.0    93.561833    306.77
42        GTX 1060      50.0    78.154603    537.32
43      RX 6500 XT      50.0    95.285455    264.42
44      GTX 760 Ti      50.0    72.979793     465.0
45      GTX 750 Ti      50.0       70.686    239.72
46        GTX 1050      50.0    79.620167    251.16

CPU's

cpu_master_df = pd.DataFrame(columns=['name', 'min price', 'mean price', 'max price'])
cpu_master_df['name']=CPUs
# 'https://www.ebay.com/sch/i.html?_from=R40&_nkw=ryzen+5+3600&_sacat=164&LH_Sold=1&LH_Complete=1&rt=nc&LH_ItemCondition=1000%7C1500%7C2500%7C3000&_ipg=240'
# what is the string to put the search in?
urlp1="https://www.ebay.com/sch/i.html?_from=R40&_nkw="
urlp2="&_sacat=164&LH_Sold=1&LH_Complete=1&rt=nc&LH_ItemCondition=1000%7C1500%7C2500%7C3000&_ipg=240"
j=0
for cpu in CPUs:
  searchstr=(cpu.replace(" ", "+"))
  url=str.join("", [urlp1,searchstr,urlp2])
  #print(url)
  html=urllib.request.urlopen(url).read()
  soup=BeautifulSoup(html,'html.parser')
  main_data=soup.find_all('div',class_="s-item__info clearfix")

  names=[]
  prices=[]
  i=0
  for line in main_data:
    #print(i)
    if "to" not in line.find("span",class_="s-item__price").get_text():
      #print("yes")
      names.append(line.select_one(".s-item__title span").text)
      prices.append(line.find("span",class_="s-item__price").get_text())

    #df['price'][i]=price
  prices = [price.replace("$", "") for price in prices]
  prices = [price.replace(",", "") for price in prices]
  prices = [float(price) for price in prices]
  #prices=float(prices)
  df = pd.DataFrame({
    "name": names,
    "price": prices
  })
  df=df.drop(0)
  #print('Max Price',df['price'].max())
  #print('Mean Price',df['price'].mean())
  #print('Min Price',df['price'].min())

#time.sleep(0.1)
  cpu_master_df['min price'][j]=df['price'].min()
  cpu_master_df['mean price'][j]=df['price'].mean()
  cpu_master_df['max price'][j]=df['price'].max()
  j=j+1
  #print(master_df.head())

print(cpu_master_df)
             name min price  mean price max price
0    Ryzen 5 3600      33.0   70.748213     241.5
1    Ryzen 5 5500      50.0   83.893776     184.0
2    Ryzen 5 5600      1.04  122.680212     325.0
3   Ryzen 5 4600G     13.11   69.023745    143.56
4   Ryzen 7 2700X      27.0   68.579731     157.5
5   Ryzen 5 3600X      35.0   75.843067    229.95
6    Ryzen 7 4700      21.5  144.200732    371.55
7    Ryzen 7 5700     10.99  148.918792     285.0
8   Ryzen 5 5600X     29.99  132.423983     325.0
9    Ryzen 7 5800       5.0  205.142255     500.0
10  Ryzen 5 7600X     89.99  197.200753    284.99
11   Ryzen 5 7600     89.99  195.722596    284.99
12   Ryzen 9 5900     150.0  250.934034    531.24
13  Ryzen 9 3950X     150.0  282.228861     500.0

Motherboards

M

moth_master_df = pd.DataFrame(columns=['name', 'min price', 'mean price', 'max price'])
moth_master_df['name']= Motherboards
# 'https://www.ebay.com/sch/i.html?_from=R40&_nkw=b450%20&_sacat=1244&LH_TitleDesc=0&LH_Sold=1&LH_Complete=1&LH_ItemCondition=1000%7C1500%7C2500%7C3000&_ipg=240&rt=nc&_udlo=30'
# what is the string to put the search in?
urlp1="https://www.ebay.com/sch/i.html?_from=R40&_nkw="
urlp2="%20&_sacat=1244&LH_TitleDesc=0&LH_Sold=1&LH_Complete=1&LH_ItemCondition=1000%7C1500%7C2500%7C3000&_ipg=240&rt=nc&_udlo=30"
j=0
for moth in Motherboards:
  searchstr=(moth.replace(" ", "+"))
  url=str.join("", [urlp1,searchstr,urlp2])
  #print(url)
  html=urllib.request.urlopen(url).read()
  soup=BeautifulSoup(html,'html.parser')
  main_data=soup.find_all('div',class_="s-item__info clearfix")

  names=[]
  prices=[]
  i=0
  for line in main_data:
    #print(i)
    if "to" not in line.find("span",class_="s-item__price").get_text():
      #print("yes")
      names.append(line.select_one(".s-item__title span").text)
      prices.append(line.find("span",class_="s-item__price").get_text())

    #df['price'][i]=price
  prices = [price.replace("$", "") for price in prices]
  prices = [price.replace(",", "") for price in prices]
  prices = [float(price) for price in prices]
  #prices=float(prices)
  df = pd.DataFrame({
    "name": names,
    "price": prices
  })
  df=df.drop(0)
  #print('Max Price',df['price'].max())
  #print('Mean Price',df['price'].mean())
  #print('Min Price',df['price'].min())

#time.sleep(0.1)
  moth_master_df['min price'][j]=df['price'].min()
  moth_master_df['mean price'][j]=df['price'].mean()
  moth_master_df['max price'][j]=df['price'].max()
  j=j+1
  #print(master_df.head())

print(moth_master_df)
   name min price  mean price max price
0  b450      31.0   79.164603    349.99
1  b550     34.09  105.349874     371.0
2  b460      30.0   87.572527    321.98
3  b560      33.0   90.912958    291.03