hi guys
i am learning scrapping on few different types of sites and i cannot figure it out how to get only 1 word from a string
returned:
basically i would like to get seperate 3 Bedrooms, 3 Bathrooms Apartment etc..
also i ve got another question:
i am also doing scrapping for different site and there i could use .text function ... and now i had to use get_text().. why is like that?
i am learning scrapping on few different types of sites and i cannot figure it out how to get only 1 word from a string
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
from bs4 import BeautifulSoup import requests import pandas as pd import re headers = { 'Accept' : 'application/json' , 'Referer' : 'https://www.padmapper.com/apartments/calgary-ab?property-categories=apartment,condo&sort=-price&lease-term=long' , 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36' , 'Sec-Fetch-Mode' : 'cors' , 'Content-Type' : 'application/json' , } data = '["{\\"sessionId\\":\\"uawg5o3gkk\\",\\"uniqueId\\":\\"2t383bqgq9\\",\\"viewportHeight\\":937,\\"viewportWidth\\":1920,\\"url\\":\\"https://www.padmapper.com/apartments/calgary-ab?property-categories=apartment,condo&sort=-price&lease-term=long\\",\\"site\\":2,\\"timestamp\\":1567001142402,\\"datetime\\":1567001142,\\"abTests\\":{},\\"filters\\":{\\"availableAfter\\":null,\\"availableBefore\\":null,\\"bedrooms\\":[false,false,false,false,false,false],\\"buildingAmenities\\":[],\\"cats\\":null,\\"customMaxPrice\\":false,\\"dogs\\":null,\\"encodedFilters\\":null,\\"excludeAirbnb\\":null,\\"external\\":true,\\"features\\":[],\\"feeds\\":null,\\"floorplans\\":null,\\"hasImages\\":null,\\"isZappable\\":null,\\"keywords\\":null,\\"listingAmenities\\":[],\\"liveByPolygons\\":[],\\"longTerm\\":true,\\"maxLat\\":null,\\"maxLng\\":null,\\"maxDays\\":null,\\"maxPrice\\":5000,\\"maxPricePerBedroom\\":null,\\"maxSquareFeet\\":null,\\"minBathrooms\\":null,\\"minLat\\":null,\\"minLng\\":null,\\"minPrice\\":0,\\"minSquareFeet\\":null,\\"mmFrom\\":null,\\"neighborhoodIds\\":[],\\"noFees\\":null,\\"pad\\":null,\\"polygons\\":[],\\"propertyCategories\\":{\\"apartment\\":true,\\"condo\\":true,\\"house\\":null,\\"room\\":null,\\"other\\":null},\\"q\\":null,\\"qs\\":null,\\"incomeRestricted\\":null,\\"section8\\":null,\\"shortTerm\\":false,\\"sort\\":[\\"-price\\"],\\"sources\\":{\\"apartmentSearch\\":null,\\"airbnb\\":null,\\"homesuite\\":null,\\"forRent\\":null,\\"other\\":null},\\"transits\\":{},\\"url\\":null},\\"location\\":{\\"box\\":{\\"minLng\\":null,\\"maxLng\\":null,\\"minLat\\":null,\\"maxLat\\":null},\\"lat\\":51.0340114880156,\\"lng\\":-114.069383877148,\\"url\\":\\"calgary-ab\\",\\"zoom\\":12,\\"cityName\\":\\"Calgary\\",\\"stateName\\":\\"AB\\",\\"fromMapInteraction\\":false,\\"preventLocationUpdate\\":false},\\"eventAttributes\\":{\\"visiblePropertyIds\\":[\\"l-36076268\\"],\\"loadedPropertyIds\\":[\\"l-36076268\\",\\"l-37665902\\",\\"l-37450465\\",\\"b-914783:p-398495\\",\\"l-36648834\\",\\"b-913871:p-397953\\",\\"b-918153:p-400087\\",\\"b-914844:p-369364\\",\\"l-32346757\\",\\"l-37723485\\",\\"l-37711257\\",\\"b-924031:p-403550\\",\\"l-37735461\\",\\"l-37735462\\",\\"l-26362122\\",\\"l-37711255\\",\\"b-926407:p-404370\\",\\"l-19309641\\",\\"l-24207390\\",\\"l-37628863\\"]},\\"name\\":\\"LIST_ITEM_IMPRESSION\\",\\"versionNumber\\":\\"3.8.0\\"}","{\\"sessionId\\":\\"uawg5o3gkk\\",\\"uniqueId\\":\\"2t383bqgq9\\",\\"viewportHeight\\":937,\\"viewportWidth\\":979,\\"url\\":\\"https://www.padmapper.com/apartments/calgary-ab?property-categories=apartment,condo&sort=-price&lease-term=long\\",\\"site\\":2,\\"timestamp\\":1567001173065,\\"datetime\\":1567001173,\\"abTests\\":{},\\"filters\\":{\\"availableAfter\\":null,\\"availableBefore\\":null,\\"bedrooms\\":[false,false,false,false,false,false],\\"buildingAmenities\\":[],\\"cats\\":null,\\"customMaxPrice\\":false,\\"dogs\\":null,\\"encodedFilters\\":null,\\"excludeAirbnb\\":null,\\"external\\":true,\\"features\\":[],\\"feeds\\":null,\\"floorplans\\":null,\\"hasImages\\":null,\\"isZappable\\":null,\\"keywords\\":null,\\"listingAmenities\\":[],\\"liveByPolygons\\":[],\\"longTerm\\":true,\\"maxLat\\":null,\\"maxLng\\":null,\\"maxDays\\":null,\\"maxPrice\\":5000,\\"maxPricePerBedroom\\":null,\\"maxSquareFeet\\":null,\\"minBathrooms\\":null,\\"minLat\\":null,\\"minLng\\":null,\\"minPrice\\":0,\\"minSquareFeet\\":null,\\"mmFrom\\":null,\\"neighborhoodIds\\":[],\\"noFees\\":null,\\"pad\\":null,\\"polygons\\":[],\\"propertyCategories\\":{\\"apartment\\":true,\\"condo\\":true,\\"house\\":null,\\"room\\":null,\\"other\\":null},\\"q\\":null,\\"qs\\":null,\\"incomeRestricted\\":null,\\"section8\\":null,\\"shortTerm\\":false,\\"sort\\":[\\"-price\\"],\\"sources\\":{\\"apartmentSearch\\":null,\\"airbnb\\":null,\\"homesuite\\":null,\\"forRent\\":null,\\"other\\":null},\\"transits\\":{},\\"url\\":null},\\"location\\":{\\"box\\":{\\"minLng\\":null,\\"maxLng\\":null,\\"minLat\\":null,\\"maxLat\\":null},\\"lat\\":51.0340114880156,\\"lng\\":-114.069383877148,\\"url\\":\\"calgary-ab\\",\\"zoom\\":12,\\"cityName\\":\\"Calgary\\",\\"stateName\\":\\"AB\\",\\"fromMapInteraction\\":false,\\"preventLocationUpdate\\":false},\\"eventAttributes\\":{\\"visiblePropertyIds\\":[\\"l-36076268\\"],\\"loadedPropertyIds\\":[\\"l-36076268\\",\\"l-37665902\\",\\"l-37450465\\",\\"b-914783:p-398495\\",\\"l-36648834\\",\\"b-913871:p-397953\\",\\"b-918153:p-400087\\",\\"b-914844:p-369364\\",\\"l-32346757\\",\\"l-37723485\\",\\"l-37711257\\",\\"b-924031:p-403550\\",\\"l-37735461\\",\\"l-37735462\\",\\"l-26362122\\",\\"l-37711255\\",\\"b-926407:p-404370\\",\\"l-19309641\\",\\"l-24207390\\",\\"l-37628863\\"]},\\"name\\":\\"LIST_ITEM_IMPRESSION\\",\\"versionNumber\\":\\"3.8.0\\"}"]' r = requests.get( 'https://www.padmapper.com/apartments/calgary-ab?property-categories=apartment,condo&sort=-price&lease-term=long' , headers = headers, data = data) content = (r.text) soup = BeautifulSoup(content, 'html.parser' ) for section in soup.find_all( class_ = 'ListItemFull_noGutterRow__2UIM- ListItemFull_listItemFull__1k2w4' ): print (section.find( 'span' , { 'class' : 'ListItemFull_text__1DqPn' }).get_text()) print (section.find( 'div' , { 'class' : 'ListItemFull_info__3qO0i' }).get_text()) print (section.find( 'a' , { 'class' : 'ListItemFull_headerText__3OoiC' }).get_text()) break |
1 2 3 |
$ 6 , 500 3 Bedrooms · 3 Bathrooms Apartment · Downtown Calgary, Calgary 110 7 St SW #1201 |
also i ve got another question:
i am also doing scrapping for different site and there i could use .text function ... and now i had to use get_text().. why is like that?