Python Forum

Full Version: scraping in a text/javascript
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
hi everyone,
sorry for my bad english but i wanna know how to extract a number from a text/javascript without using selenium library or something like this,maybe with bs4.
thanks for your help and have a nice rest of your day :)
p.s.:this is the js and i wanna extract the ID '1651'

<script type="text/javascript">
        var spConfigDisabledProducts = [-1
        , '294716', '294718', '294719', '294720', '294721', '294723', '294725', '294726', '294727', '294729', '294730', '294731', '294732', '294733', '294734', '294735', '294736'        ];
        var spConfig = new Product.Config({"attributes":{"959":{"id":"959","code":"aw_taglia","label":"Taglia","options":[{"id":"1644","label":"26.5","price":"0","oldPrice":"0","products":["294716"]},{"id":"1645","label":"27","price":"0","oldPrice":"0","products":["294718"]},{"id":"1646","label":"27.5","price":"0","oldPrice":"0","products":["294719"]},{"id":"1647","label":"28","price":"0","oldPrice":"0","products":["294720"]},{"id":"1648","label":"28.5","price":"0","oldPrice":"0","products":["294721"]},{"id":"1649","label":"29","price":"0","oldPrice":"0","products":["294722"]},{"id":"1650","label":"29.5","price":"0","oldPrice":"0","products":["294723"]},{"id":"1651","label":"30","price":"0","oldPrice":"0","products":["294724"]},{"id":"1652","label":"30.5","price":"0","oldPrice":"0","products":["294725"]},{"id":"1653","label":"31","price":"0","oldPrice":"0","products":["294726"]},{"id":"1654","label":"31.5","price":"0","oldPrice":"0","products":["294727"]},{"id":"1655","label":"32","price":"0","oldPrice":"0","products":["294728"]},{"id":"1656","label":"32.5","price":"0","oldPrice":"0","products":["294729"]},{"id":"1657","label":"33","price":"0","oldPrice":"0","products":["294730"]},{"id":"1658","label":"33.5","price":"0","oldPrice":"0","products":["294731"]},{"id":"1659","label":"34","price":"0","oldPrice":"0","products":["294732"]},{"id":"1660","label":"34.5","price":"0","oldPrice":"0","products":["294733"]},{"id":"1661","label":"35","price":"0","oldPrice":"0","products":["294734"]},{"id":"1662","label":"35.5","price":"0","oldPrice":"0","products":["294735"]},{"id":"1663","label":"36","price":"0","oldPrice":"0","products":["294736"]}]}},"template":"#{price}\u00a0\u20ac","basePrice":"160","oldPrice":"160","productId":"294717","chooseText":"Seleziona","taxConfig":{"includeTax":true,"showIncludeTax":true,"showBothPrices":false,"defaultTax":0,"currentTax":0,"inclTaxTitle":"Incl. Tasse"}});
        jQuery("#attribute959 option").each(function () {
            var option = jQuery(this);
            var id = option.attr('value');
            jQuery.each(spConfig.config.attributes, function () {
                jQuery.each(this.options, function () {
                    if (this.id == id) {
                        if (spConfigDisabledProducts.indexOf(this.products[0]) >= 0) {
                            option.data('disabled', true);
                        }
                    }
                });
            });
        });
    </script>
I dont think BS4 has a way to parse the top level element like that? It's not meant to do that. There is nothing stopping you from grabbing the html source and regex finding what you need out of it though, except if the script element itself is modified by javascript, then you must use selenium to even obtain the proper element.

If there is no way with BS4 that i am unaware of, i would probably parse out spConfig into proper JSON and then load it like a dictionary. As i think if you omit var spConfig = new Product.Config() it leaves what appears to be a dictionary/JSON. Then it would leave for less room for error, or if they append to that it would not break your script.