Aug-28-2019, 04:58 PM
hi guys
i am learning scrapping on few different types of sites and i cannot figure it out how to get only 1 word from a string
also i ve got another question:
i am also doing scrapping for different site and there i could use .text function ... and now i had to use get_text().. why is like that?
i am learning scrapping on few different types of sites and i cannot figure it out how to get only 1 word from a string
from bs4 import BeautifulSoup import requests import pandas as pd import re headers = { 'Accept': 'application/json', 'Referer': 'https://www.padmapper.com/apartments/calgary-ab?property-categories=apartment,condo&sort=-price&lease-term=long', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36', 'Sec-Fetch-Mode': 'cors', 'Content-Type': 'application/json', } data = '["{\\"sessionId\\":\\"uawg5o3gkk\\",\\"uniqueId\\":\\"2t383bqgq9\\",\\"viewportHeight\\":937,\\"viewportWidth\\":1920,\\"url\\":\\"https://www.padmapper.com/apartments/calgary-ab?property-categories=apartment,condo&sort=-price&lease-term=long\\",\\"site\\":2,\\"timestamp\\":1567001142402,\\"datetime\\":1567001142,\\"abTests\\":{},\\"filters\\":{\\"availableAfter\\":null,\\"availableBefore\\":null,\\"bedrooms\\":[false,false,false,false,false,false],\\"buildingAmenities\\":[],\\"cats\\":null,\\"customMaxPrice\\":false,\\"dogs\\":null,\\"encodedFilters\\":null,\\"excludeAirbnb\\":null,\\"external\\":true,\\"features\\":[],\\"feeds\\":null,\\"floorplans\\":null,\\"hasImages\\":null,\\"isZappable\\":null,\\"keywords\\":null,\\"listingAmenities\\":[],\\"liveByPolygons\\":[],\\"longTerm\\":true,\\"maxLat\\":null,\\"maxLng\\":null,\\"maxDays\\":null,\\"maxPrice\\":5000,\\"maxPricePerBedroom\\":null,\\"maxSquareFeet\\":null,\\"minBathrooms\\":null,\\"minLat\\":null,\\"minLng\\":null,\\"minPrice\\":0,\\"minSquareFeet\\":null,\\"mmFrom\\":null,\\"neighborhoodIds\\":[],\\"noFees\\":null,\\"pad\\":null,\\"polygons\\":[],\\"propertyCategories\\":{\\"apartment\\":true,\\"condo\\":true,\\"house\\":null,\\"room\\":null,\\"other\\":null},\\"q\\":null,\\"qs\\":null,\\"incomeRestricted\\":null,\\"section8\\":null,\\"shortTerm\\":false,\\"sort\\":[\\"-price\\"],\\"sources\\":{\\"apartmentSearch\\":null,\\"airbnb\\":null,\\"homesuite\\":null,\\"forRent\\":null,\\"other\\":null},\\"transits\\":{},\\"url\\":null},\\"location\\":{\\"box\\":{\\"minLng\\":null,\\"maxLng\\":null,\\"minLat\\":null,\\"maxLat\\":null},\\"lat\\":51.0340114880156,\\"lng\\":-114.069383877148,\\"url\\":\\"calgary-ab\\",\\"zoom\\":12,\\"cityName\\":\\"Calgary\\",\\"stateName\\":\\"AB\\",\\"fromMapInteraction\\":false,\\"preventLocationUpdate\\":false},\\"eventAttributes\\":{\\"visiblePropertyIds\\":[\\"l-36076268\\"],\\"loadedPropertyIds\\":[\\"l-36076268\\",\\"l-37665902\\",\\"l-37450465\\",\\"b-914783:p-398495\\",\\"l-36648834\\",\\"b-913871:p-397953\\",\\"b-918153:p-400087\\",\\"b-914844:p-369364\\",\\"l-32346757\\",\\"l-37723485\\",\\"l-37711257\\",\\"b-924031:p-403550\\",\\"l-37735461\\",\\"l-37735462\\",\\"l-26362122\\",\\"l-37711255\\",\\"b-926407:p-404370\\",\\"l-19309641\\",\\"l-24207390\\",\\"l-37628863\\"]},\\"name\\":\\"LIST_ITEM_IMPRESSION\\",\\"versionNumber\\":\\"3.8.0\\"}","{\\"sessionId\\":\\"uawg5o3gkk\\",\\"uniqueId\\":\\"2t383bqgq9\\",\\"viewportHeight\\":937,\\"viewportWidth\\":979,\\"url\\":\\"https://www.padmapper.com/apartments/calgary-ab?property-categories=apartment,condo&sort=-price&lease-term=long\\",\\"site\\":2,\\"timestamp\\":1567001173065,\\"datetime\\":1567001173,\\"abTests\\":{},\\"filters\\":{\\"availableAfter\\":null,\\"availableBefore\\":null,\\"bedrooms\\":[false,false,false,false,false,false],\\"buildingAmenities\\":[],\\"cats\\":null,\\"customMaxPrice\\":false,\\"dogs\\":null,\\"encodedFilters\\":null,\\"excludeAirbnb\\":null,\\"external\\":true,\\"features\\":[],\\"feeds\\":null,\\"floorplans\\":null,\\"hasImages\\":null,\\"isZappable\\":null,\\"keywords\\":null,\\"listingAmenities\\":[],\\"liveByPolygons\\":[],\\"longTerm\\":true,\\"maxLat\\":null,\\"maxLng\\":null,\\"maxDays\\":null,\\"maxPrice\\":5000,\\"maxPricePerBedroom\\":null,\\"maxSquareFeet\\":null,\\"minBathrooms\\":null,\\"minLat\\":null,\\"minLng\\":null,\\"minPrice\\":0,\\"minSquareFeet\\":null,\\"mmFrom\\":null,\\"neighborhoodIds\\":[],\\"noFees\\":null,\\"pad\\":null,\\"polygons\\":[],\\"propertyCategories\\":{\\"apartment\\":true,\\"condo\\":true,\\"house\\":null,\\"room\\":null,\\"other\\":null},\\"q\\":null,\\"qs\\":null,\\"incomeRestricted\\":null,\\"section8\\":null,\\"shortTerm\\":false,\\"sort\\":[\\"-price\\"],\\"sources\\":{\\"apartmentSearch\\":null,\\"airbnb\\":null,\\"homesuite\\":null,\\"forRent\\":null,\\"other\\":null},\\"transits\\":{},\\"url\\":null},\\"location\\":{\\"box\\":{\\"minLng\\":null,\\"maxLng\\":null,\\"minLat\\":null,\\"maxLat\\":null},\\"lat\\":51.0340114880156,\\"lng\\":-114.069383877148,\\"url\\":\\"calgary-ab\\",\\"zoom\\":12,\\"cityName\\":\\"Calgary\\",\\"stateName\\":\\"AB\\",\\"fromMapInteraction\\":false,\\"preventLocationUpdate\\":false},\\"eventAttributes\\":{\\"visiblePropertyIds\\":[\\"l-36076268\\"],\\"loadedPropertyIds\\":[\\"l-36076268\\",\\"l-37665902\\",\\"l-37450465\\",\\"b-914783:p-398495\\",\\"l-36648834\\",\\"b-913871:p-397953\\",\\"b-918153:p-400087\\",\\"b-914844:p-369364\\",\\"l-32346757\\",\\"l-37723485\\",\\"l-37711257\\",\\"b-924031:p-403550\\",\\"l-37735461\\",\\"l-37735462\\",\\"l-26362122\\",\\"l-37711255\\",\\"b-926407:p-404370\\",\\"l-19309641\\",\\"l-24207390\\",\\"l-37628863\\"]},\\"name\\":\\"LIST_ITEM_IMPRESSION\\",\\"versionNumber\\":\\"3.8.0\\"}"]' r = requests.get('https://www.padmapper.com/apartments/calgary-ab?property-categories=apartment,condo&sort=-price&lease-term=long', headers=headers, data=data) content = (r.text) soup = BeautifulSoup(content, 'html.parser') for section in soup.find_all(class_='ListItemFull_noGutterRow__2UIM- ListItemFull_listItemFull__1k2w4'): print(section.find('span', {'class':'ListItemFull_text__1DqPn'}).get_text()) print(section.find('div', {'class':'ListItemFull_info__3qO0i'}).get_text()) print(section.find('a', {'class':'ListItemFull_headerText__3OoiC'}).get_text()) breakreturned:
$6,500 3 Bedrooms · 3 Bathrooms Apartment · Downtown Calgary, Calgary 110 7 St SW #1201basically i would like to get seperate 3 Bedrooms, 3 Bathrooms Apartment etc..
also i ve got another question:
i am also doing scrapping for different site and there i could use .text function ... and now i had to use get_text().. why is like that?