CoderCastrov logo
CoderCastrov
Программирование

Как сохранить данные парсинга в CSV и проанализировать их

Как сохранить данные парсинга в CSV и проанализировать их
просмотров
4 мин чтение
#Программирование

16/11/22

Здесь я практикуюсь в сохранении данных парсинга в CSV и их анализе. Сначала мы сохраняем результаты парсинга в файл CSV.

Шаг 1

from bs4 import BeautifulSoup
import requests
import pandas as pd

def convertR(rtg):
    if rtg == ['star-rating', 'One']:
        rtg = '1'
        return rtg
    elif rtg == ['star-rating', 'Two']:
        rtg = '2'
        return rtg
    elif rtg == ['star-rating', 'Three']:
        rtg = '3'
        return rtg
    elif rtg == ['star-rating', 'Four']:
        rtg = '4'
        return rtg
    else:
        rtg = '5'
        return rtg

try :
    print('')
    print('Начинаем парсинг...')
    print('')
    data=[]
    for page in range(1, 5):
        html = requests.get('http://books.toscrape.com/catalogue/page-'+str(page)+'.html')
        html_soup = BeautifulSoup(html.content, 'html.parser')
        data_all = html_soup.find_all('article', class_ = 'product_pod')
        print('-------------------------------')
        print('~ ~ ~ Парсинг страницы №' + str(page) + ' ~ ~ ~')
        print('-------------------------------')
        for b in data_all:

            dataGbr = b.find('img', class_='thumbnail')
            dataGambar = dataGbr.get('src')

            dataJdl = b.find('h3')
            dataJdl2 = dataJdl.find('a')
            dataJudul = dataJdl2.get('title')

            dataRtg = b.find('p')
            dataRating = dataRtg.get('class')
            Rating = convertR(dataRating)

            dataHarga = b.find('p', class_ = 'price_color').text

            #print('')
            #print('Gambar : ' + str(dataGambar.replace('..','http://books.toscrape.com')))
            #print('Judul  : ' + str(dataJudul))
            #print('Rating : ' + str(Rating))
            #print('Harga  : ' + dataHarga.replace('£',''))
            #print('')

            data.append({
            'Gambar':dataGambar,
            'Judul':dataJudul,
            'Rating':Rating,
            'Harga':dataHarga.replace('£','')
        })
    df = pd.DataFrame(data)
    df.to_csv('Data_Buku.csv', encoding='utf-8')
    print('Парсинг успешно завершен...')

except Exception as err :
    ('')

Запустите приведенный выше код.

Затем следующие результаты:

,Gambar,Judul,Rating,Harga
0,../media/cache/2c/da/2cdad67c44b002e7ead0cc35693c0e8b.jpg,A Light in the Attic,3,51.77
1,../media/cache/26/0c/260c6ae16bce31c8f8c95daddd9f4a1c.jpg,Tipping the Velvet,1,53.74
2,../media/cache/3e/ef/3eef99c9d9adef34639f510662022830.jpg,Soumission,1,50.10
3,../media/cache/32/51/3251cf3a3412f53f339e42cac2134093.jpg,Sharp Objects,4,47.82
4,../media/cache/be/a5/bea5697f2534a2f86a3ef27b5a8c12a6.jpg,Sapiens: A Brief History of Humankind,5,54.23
5,../media/cache/68/33/68339b4c9bc034267e1da611ab3b34f8.jpg,The Requiem Red,1,22.65
6,../media/cache/92/27/92274a95b7c251fea59a2b8a78275ab4.jpg,The Dirty Little Secrets of Getting Your Dream Job,4,33.34
7,../media/cache/3d/54/3d54940e57e662c4dd1f3ff00c78cc64.jpg,"The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull",3,17.93
8,../media/cache/66/88/66883b91f6804b2323c8369331cb7dd1.jpg,The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics,4,22.60
9,../media/cache/58/46/5846057e28022268153beff6d352b06c.jpg,The Black Maria,1,52.15
10,../media/cache/be/f4/bef44da28c98f905a3ebec0b87be8530.jpg,"Starving Hearts (Triangular Trade Trilogy, #1)",2,13.99
11,../media/cache/10/48/1048f63d3b5061cd2f424d20b3f9b666.jpg,Shakespeare's Sonnets,4,20.6612,../media/cache/5b/88/5b88c52633f53cacf162c15f4f823153.jpg,Set Me Free,5,17.4613,../media/cache/94/b1/94b1b8b244bce9677c2f29ccc890d4d2.jpg,Scott Pilgrim's Precious Little Life (Scott Pilgrim #1),5,52.29
14,../media/cache/81/c4/81c4a973364e17d01f217e1188253d5e.jpg,Rip it Up and Start Again,5,35.02
15,../media/cache/54/60/54607fe8945897cdcced0044103b10b6.jpg,"Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991",3,57.25
16,../media/cache/55/33/553310a7162dfbc2c6d19a84da0df9e1.jpg,Olio,1,23.88
17,../media/cache/09/a3/09a3aef48557576e1a85ba7efea8ecb7.jpg,Mesaerion: The Best Science Fiction Stories 1800-1849,1,37.59
18,../media/cache/0b/bc/0bbcd0a6f4bcd81ccb1049a52736406e.jpg,Libertarianism for Beginners,2,51.33
19,../media/cache/27/a5/27a53d0bb95bdd88288eaf66c9230d7e.jpg,It's Only the Himalayas,2,45.1720,../media/cache/5d/72/5d72709c6a7a9584a4d1cf07648bfce1.jpg,In Her Wake,1,12.8421,../media/cache/5c/c8/5cc8e107246cb478960d4f0aba1e1c8e.jpg,How Music Works,2,37.3222,../media/cache/9f/59/9f59f01fa916a7bb8f0b28a4012179a4.jpg,"Foolproof Preserving: A Guide to Small Batch Jams, Jellies, Pickles, Condiments, and More: A Foolproof Guide to Making Small Batch Jams, Jellies, Pickles, Condiments, and More",3,30.5223,../media/cache/9c/2e/9c2e0eb8866b8e3f3b768994fd3d1c1a.jpg,Chase Me (Paris Nights #2),5,25.2724,../media/cache/44/cc/44ccc99c8f82c33d4f9d2afa4ef25787.jpg,Black Dust,5,34.5325,../media/cache/af/6e/af6e796160fe63e0cf19d44395c7ddf2.jpg,Birdsong: A Story in Pictures,3,54.6426,../media/cache/ef/0b/ef0bed08de4e083dba5e20fdb98d9c36.jpg,America's Cradle of Quarterbacks: Western Pennsylvania's Football Factory from Johnny Unitas to Joe Montana,3,22.5027,../media/cache/d6/da/d6da0371958068bbaf39ea9c174275cd.jpg,Aladdin and His Wonderful Lamp,3,53.1328,../media/cache/2e/98/2e98c332bf8563b584784971541c4445.jpg,Worlds Elsewhere: Journeys Around Shakespeare’s Globe,5,40.3029,../media/cache/a5/41/a5416b9646aaa7287baa287ec2590270.jpg,Wall and Piece,4,44.1830,../media/cache/0f/7e/0f7ee69495c0df1d35723f012624a9f8.jpg,The Four Agreements: A Practical Guide to Personal Freedom,5,17.6631,../media/cache/38/c5/38c56fba316c07305643a8065269594e.jpg,The Five Love Languages: How to Express Heartfelt Commitment to Your Mate,3,31.0532,../media/cache/5d/7e/5d7ecde8e81513eba8a64c9fe000744b.jpg,The Elephant Tree,5,23.8233,../media/cache/cf/bb/cfbb5e62715c6d888fd07794c9bab5d6.jpg,The Bear and the Piano,1,36.8934,../media/cache/65/71/6571919836ec51ed54f0050c31d8a0cd.jpg,Sophie's World,5,15.94
35,../media/cache/12/53/1253c21c5ef3c6d075c5fa3f5fecee6a.jpg,Penny Maybe,3,33.29
36,../media/cache/f5/88/f5889d038f5d8e949b494d147c2dcf54.jpg,Maude (1883-1993):She Grew Up with the country,2,18.02
37,../media/cache/23/85/238570a1c284e730dbc737a7e631ae2b.jpg,"In a Dark, Dark Wood",1,19.63
38,../media/cache/e1/5c/e15c289ba58cea38519e1281e859f0c1.jpg,Behind Closed Doors,4,52.22
39,../media/cache/e9/20/e9203b733126c4a0832a1c7885dc27cf.jpg,You can't bury them all: Poems,2,33.6340,../media/cache/72/41/72417db983862010ef0c1a25de98c7d7.jpg,Slow States of Collapse: Poems,3,57.3141,../media/cache/cb/bd/cbbdb0222ee8a0f6ab61657412a15794.jpg,Reasons to Stay Alive,2,26.4142,../media/cache/9d/05/9d0533bae1578846d728a82913b95c26.jpg,Private Paris (Private #10),5,47.6143,../media/cache/9c/46/9c463c7631c82401160fd3b554b8f0e1.jpg,#HigherSelfie: Wake Up Your Life. Free Your Soul. Find Your Tribe.,5,23.1144,../media/cache/24/e2/24e2f5c9d325c4004d8190c054da86dd.jpg,Without Borders (Wanderlove #1),2,45.0745,../media/cache/08/04/08044269fc197645268a6197c57e6173.jpg,When We Collided,1,31.7746,../media/cache/5f/15/5f152afdbc42356ecba02f61058a7e5b.jpg,"We Love You, Charlie Freeman",5,50.2747,../media/cache/f9/3b/f93b4a650f03a5d21f2436d7813f42c2.jpg,Untitled Collection: Sabbath Poems 2014,4,14.2748,../media/cache/41/a2/41a20f35adf0caea24f208dc01ad7681.jpg,"Unseen City: The Majesty of Pigeons, the Discreet Charm of Snails & Other Wonders of the Urban Wilderness",4,44.1849,../media/cache/76/8e/768ea5924ac1ef6297c2be9959c796c2.jpg,Unicorn Tracks,3,18.7850,../media/cache/4a/3b/4a3b055f9e378a95fedbef55e7bab7ce.jpg,"Unbound: How Eight Technologies Made Us Human, Transformed Society, and Brought Our World to the Brink",1,25.5251,../media/cache/36/df/36df4caaf1420b1183a8235355d39e69.jpg,Tsubasa: WoRLD CHRoNiCLE 2 (Tsubasa WoRLD CHRoNiCLE #2),1,16.2852,../media/cache/03/86/038650c9e7517b4baf2a423cd8eed38f.jpg,Throwing Rocks at the Google Bus: How Growth Became the Enemy of Prosperity,3,31.1253,../media/cache/c4/dd/c4ddd9ced89966b0602ec85e00cd5b61.jpg,This One Summer,4,19.4954,../media/cache/c4/0a/c40a64f59e7487b1a80a049f6ceb2ba5.jpg,Thirst,5,17.2755,../media/cache/33/e5/33e507172541628acfd421503196b578.jpg,The Torch Is Passed: A Harding Family Story,1,19.0956,../media/cache/c4/a2/c4a2a1a026c67bcceb5a411c724d7d0c.jpg,The Secret of Dreadwillow Carse,1,56.1357,../media/cache/b7/f4/b7f4843dbe062d44be1ffcfa16b2faa4.jpg,"The Pioneer Woman Cooks: Dinnertime: Comfort Classics, Freezer Food, 16-Minute Meals, and Other Delicious Ways to Solve Supper!",1,56.4158,../media/cache/89/b8/89b850edb01851a91f64ba114b96acb6.jpg,The Past Never Ends,4,56.5059,../media/cache/5d/7f/5d7f496cdf5e5962a73ecdcc1505c1d5.jpg,The Natural History of Us (The Fine Art of Pretending #2),3,45.2260,../media/cache/f4/79/f479de5f305c2ac0512702cf7155bb74.jpg,The Nameless City (The Nameless City #1),4,38.1661,../media/cache/dc/44/dc44f8e2aebac48ca8553814d9b021a8.jpg,The Murder That Never Was (Forensic Instincts #5),3,54.1162,../media/cache/d4/8d/d48d5122a15347e9fe2b15ad354d69bf.jpg,The Most Perfect Thing: Inside (and Outside) a Bird's Egg,4,42.96
63,../media/cache/f8/6d/f86d08178e3788563ac17be5aefd29f0.jpg,"The Mindfulness and Acceptance Workbook for Anxiety: A Guide to Breaking Free from Anxiety, Phobias, and Worry Using Acceptance and Commitment Therapy",4,23.89
64,../media/cache/95/64/95647d6a526bf54120b9445e124794e1.jpg,The Life-Changing Magic of Tidying Up: The Japanese Art of Decluttering and Organizing,3,16.77
65,../media/cache/75/dc/75dce2f5949b407161f37f0af249b018.jpg,"The Inefficiency Assassin: Time Management Tactics for Working Smarter, Not Longer",5,20.59
66,../media/cache/64/15/641570cd7e7aded53c7d33d78a9629f1.jpg,The Gutsy Girl: Escapades for Your Life of Epic Adventure,1,37.13
67,../media/cache/2e/23/2e236e23ad52aa74505f224f6552eda8.jpg,The Electric Pencil: Drawings from Inside State Hospital No. 3,1,56.06
68,../media/cache/71/df/71df730cf38c232ee58a2e407135f055.jpg,The Death of Humanity: and the Case for Life,4,58.11
69,../media/cache/ee/3e/ee3e219d23e73ba71c79b700f183aaed.jpg,"The Bulletproof Diet: Lose up to a Pound a Day, Reclaim Energy and Focus, Upgrade Your Life",3,49.05
70,../media/cache/70/fa/70fa6c0437d9c97dbeada6bd32bf9d2c.jpg,The Art Forger,3,40.76
71,../media/cache/2d/4e/2d4e358712e6c9f1d3bdd78d1a16e5a8.jpg,The Age of Genius: The Seventeenth Century and the Birth of the Modern Mind,1,19.73
72,../media/cache/96/db/96db61bb53930c560fb4c1c62b583816.jpg,The Activist's Tao Te Ching: Ancient Advice for a Modern Revolution,5,32.2473,../media/cache/f3/4f/f34ffb24cc21c9f9f52dad4fd8f3ac21.jpg,Spark Joy: An Illustrated Master Class on the Art of Organizing and Tidying Up,4,41.8374,../media/cache/69/85/69852567cf97264a1442cbc882c84903.jpg,Soul Reader,2,39.5875,../media/cache/da/df/dadfac66a89774b46b10225362724c83.jpg,Security,2,39.2576,../media/cache/43/ae/43aee83ebb31e2122a7215e413770e5c.jpg,"Saga, Volume 6 (Saga (Collected Editions) #6)",3,25.0277,../media/cache/e1/ea/e1ea6cb36e62ae6dc7b805f68ab9a700.jpg,"Saga, Volume 5 (Saga (Collected Editions) #5)",2,51.0478,../media/cache/97/f8/97f8debeeaaece9603267653076e760f.jpg,Reskilling America: Learning to Labor in the Twenty-First Century,2,19.8379,../media/cache/f3/ef/f3efd43ae0fa85d9b325d5e8783e7af5.jpg,"Rat Queens, Vol. 3: Demons (Rat Queens (Collected Editions) #11-15)",3,50.40

Шаг 2

Отображает результаты анализа данных, такие как наибольшее количество книжных филиалов или цены на книги.

import pandas as pd

df = pd.read_csv('data_buku.csv')
print(df['Judul'].value_counts().to_string)

Результат

Последний шаг анализа данных

Отображение анализа данных в графическом виде

import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('Data_Buku.csv')
plt.rcParams["figure.autolayout"] = True
fig, ax = plt.subplots()
df['Rating'].sort_values(ascending=True).value_counts().plot(ax = ax, kind='barh')
plt.show()

Результат анализа

Спасибо за чтение и, надеюсь, полезно!


Переведи введенный markdown код с английского на русский. Сохрани оригинальную разметку markdown и изображения на своих местах. Вместо слов скрэпинг/скрапинг/скрапер/скрэпер используй парсинг/парсер/парсить. Возвращай только markdown код, без дополнительного текста