Python Forum
Need help executing a program
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Need help executing a program
#3
Thanks for the feedback, I'm using windows but I;m at work right now and will following these instructions as soon as I get home and let you know.

OK,

I followed the instructions and install the python interpreter, I then downloaded the python code as a zip file and renamed it zip and placed it on my c drive. I then ran the command line and changed the directory to c:\zip and then pasted the below code, it ran and then return a number of messages. I'll upload them afterward. I'm not sure what next to do though, nor do I know where to copy and paste the URL for the extext book.
#! /usr/bin/env python3
import urllib.parse
import tempfile
import json
import urllib.request
import hashlib
import os
import sys
import time
import re

from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.generic import NameObject, DictionaryObject, ArrayObject, NumberObject

from multiprocessing.pool import ThreadPool

language = "en_US"
roletypeid = 2 # 3 for instructor

arabicRegex = re.compile(r"^(?P<prefix>.*?)(\d+)$")
romanRegex = re.compile(r"^(?P<prefix>.*?)((?:(M{1,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})|M{0,4}(CM|C?D|D?C{1,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})|M{0,4}(CM|CD|D?C{0,3})(XC|X?L|L?X{1,3})(IX|IV|V?I{0,3})|M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|I?V|V?I{1,3})))+)$", re.IGNORECASE)

# Some interesting parts of the books js code:
#
# MD5_SECRET_KEY: "ipadsecuretext"
# Dear Pearson,
#   Please don't consider MD5 a "secure" algorithm by any means.
# Sincerely,
# Everybody that looks at your horrifying code
#
# UserRoleType: {
#     Student: 2,
#     Instructor: 3,
# }
# The above corresponds to the `roletypeid` GET parameter in a lot pf the requests
# Surprisingly, it's not checked at any point to see if, say, a student is impersonating
# a teacher, even though the API throws out an error if it is omited.
#
# Also, since it's there, a good TODO would be to download other types of media along with the PDF.
# Should be relatively simple.

bookInfoUrl = "http://view.ebookplus.pearsoncmg.com/ebook/pdfplayer/getbookinfov2?bookid={}&outputformat=JSON"
pageInfoUrl = "https://view.ebookplus.pearsoncmg.com/ebook/pdfplayer/getpagedetails?userid={userid}&userroleid={userroleid}&bookid={bookid}&bookeditionid={bookeditionid}&authkey={authkey}"
pdfUrl = "https://view.ebookplus.pearsoncmg.com/ebook/pdfplayer/getpdfpage?globalbookid={bookid}&pdfpage={pdfpage}&iscover={iscover}&authkey={authkey}"
bookmarkInfoUrl = "https://view.ebookplus.pearsoncmg.com/ebook/pdfplayer/getbaskettocinfo?userroleid={userroleid}&bookid={bookid}&language={language}&authkey={authkey}&bookeditionid={bookeditionid}&basket=all&scenarioid={scenarioid}&platformid=1001"

def hsidUrl(aUrl):
    # Append this url's "hsid" to it (md5 hash of its http url)
    md5Hasher = hashlib.new("md5")
    md5Hasher.update(b"ipadsecuretext")
    md5Hasher.update(aUrl.replace("https://","http://").encode("utf-8"))
    return aUrl + "&hsid=" + md5Hasher.hexdigest()

def main(eTextUrl):
    bookData = urllib.parse.parse_qs(eTextUrl.split("?")[-1])
    if (bookData.get("values", None)) is not None:
        bookData = {
            itemName : [itemValue] for itemName, itemValue in
            zip(*[iter(bookData["values"][0].split("::"))]*2)
        }
        # A few fixes in terms of capitalization
        bookData["bookid"] = bookData["bookID"]
        bookData["userid"] = bookData["userID"]
        bookData["sessionid"] = bookData["sessionID"]

        # We'll default to the roletypeid for a student
        bookData["roletypeid"] = [roletypeid] # 3 for Instructor... the server doesn't care, though


    print("Downloading metadata and eText information...")

    bookInfoGetUrl = bookInfoUrl.format(bookData["bookid"][0])
    #print(hsidUrl(bookInfoGetUrl))
    with urllib.request.urlopen(hsidUrl(bookInfoGetUrl)) as bookInfoRequest:
        str_response = bookInfoRequest.read().decode('utf-8')
        bookInfo = json.loads(str_response)
        bookInfo = bookInfo[0]['userBookTOList'][0]

    pageInfoGetUrl = pageInfoUrl.format(
        userid=bookData['userid'][0],
        userroleid=bookData['roletypeid'][0],
        bookid=bookData['bookid'][0],
        bookeditionid=bookInfo['bookEditionID'],
        authkey=bookData['sessionid'][0],
        )
    with urllib.request.urlopen(hsidUrl(pageInfoGetUrl)) as pageInfoRequest:
        pageInfo = json.loads(pageInfoRequest.read().decode('utf-8'))
        pageInfo = pageInfo[0]['pdfPlayerPageInfoTOList']

    def getPageUrl(pdfPage, isCover="N"):
        pdfPage = pdfPage.replace("/assets/","")
        getPage = pagePath = pdfUrl.format(
            bookid=bookInfo['globalBookID'],
            pdfpage=pdfPage,
            iscover=isCover,
            authkey=bookData['sessionid'][0]
        )
        return hsidUrl(getPage)


    with tempfile.TemporaryDirectory() as pdfDownloadDir:
        # Use a temporary directory to download all the pdf files to
        # First, download the cover file
        pdfPageTable = {}

        pdfPageLabelTable = {}

        urllib.request.urlretrieve(getPageUrl(bookInfo['pdfCoverArt'], isCover="Y"), os.path.join(pdfDownloadDir, "0000 - cover.pdf"))
        # Then, download all the individual pages for the e-book
        def download(pdfPage):
            pdfPageTable[pdfPage['bookPageNumber']] = pdfPage['pageOrder']
            savePath = os.path.join(pdfDownloadDir, "{:04} - {}.pdf".format(pdfPage['pageOrder'], pdfPage['bookPageNumber']))
            urllib.request.urlretrieve(getPageUrl(pdfPage['pdfPath']), savePath)

        threadPool = ThreadPool(40) # 40 threads should download a book fairly quickly
        print("Downloading pages to \"{}\"...".format(pdfDownloadDir))
        threadPool.map(download, pageInfo)

        print("Assembling PDF...")

        # Begin to assemble the final PDF, first by adding all the pages
        fileMerger = PdfFileWriter()
        for pdfFile in sorted(os.listdir(pdfDownloadDir)):
            fileMerger.addPage(PdfFileReader(os.path.join(pdfDownloadDir, pdfFile)).getPage(0))

        # And then add all the bookmarks to the final PDF
        bookmarkInfoGetUrl = bookmarkInfoUrl.format(
            userroleid=bookData['roletypeid'][0],
            bookid=bookData['bookid'][0],
            language=language,
            authkey=bookData['sessionid'][0],
            bookeditionid=bookInfo['bookEditionID'],
            scenarioid=bookData['scenario'][0],
            )

        bookmarksExist = True
            
        with urllib.request.urlopen(hsidUrl(bookmarkInfoGetUrl)) as bookmarkInfoRequest:
            try:
                bookmarkInfo = json.loads(bookmarkInfoRequest.read().decode('utf-8'))
                bookmarkInfo = bookmarkInfo[0]['basketsInfoTOList'][0]
            except Exception as e:
                bookmarksExist = False

        def recursiveSetBookmarks(aDict, parent=None):
            if isinstance(aDict, dict):
                aDict = [aDict]
            for bookmark in aDict:
                # These are the main bookmarks under this parent (or the whole document if parent is None)
                bookmarkName = bookmark['n'] # Name of the section
                pageNum = str(bookmark['lv']['content']) # First page (in the pdf's format)

                latestBookmark = fileMerger.addBookmark(bookmarkName, pdfPageTable[pageNum], parent)

                if 'be' in bookmark:
                    recursiveSetBookmarks(bookmark['be'], latestBookmark)

        if bookmarksExist:
            print("Adding bookmarks...")
            fileMerger.addBookmark("Cover", 0) # Add a bookmark to the cover at the beginning
            recursiveSetBookmarks(bookmarkInfo['document'][0]['bc']['b']['be'])
        else:
            print("Bookmarks don't exist for ID {}".format(bookData['bookid']))
        print("Fixing metadata...")
        # Hack to fix metadata and page numbers:
        pdfPageLabelTable = [(v,k) for k,v in pdfPageTable.items()]
        pdfPageLabelTable = sorted(pdfPageLabelTable, key=(lambda x: int(x[0])))
        labels = ArrayObject([
            NameObject(0), DictionaryObject({NameObject("/P"): NameObject("(cover)")})
        ])
        lastMode = None
        lastPrefix = ""
        # Now we check to see the ranges where we have roman numerals or arabic numerals
        # The following code is not ideal for this, so I'd appreciate a PR with a better solution
        for pageNumber, pageLabel in pdfPageLabelTable:
            currMode = None
            prefix = ""
            style = DictionaryObject()
            if arabicRegex.match(pageLabel):
                currMode = "arabic"
                prefix = arabicRegex.match(pageLabel).group("prefix")
                style.update({NameObject("/S"): NameObject("/D")})
            elif romanRegex.match(pageLabel):
                currMode = "roman"
                prefix = romanRegex.match(pageLabel).group("prefix")
                style.update({NameObject("/S"): NameObject("/r")})
            if currMode != lastMode or prefix != lastPrefix:
                if prefix:
                    style.update({
                        NameObject("/P"): NameObject("({})".format(prefix))
                    })
                labels.extend([
                    NumberObject(pageNumber),
                    style,
                ])
                lastMode = currMode
                lastPrefix = prefix
        rootObj = fileMerger._root_object
        # Todo: Fix the weird page numbering bug
        pageLabels = DictionaryObject()
        #fileMerger._addObject(pageLabels)
        pageLabels.update({
            NameObject("/Nums"): ArrayObject(labels)
        })
        rootObj.update({
            NameObject("/PageLabels"): pageLabels
        })

        print("Writing PDF...")
        with open("{} - {}.pdf".format(bookData['bookid'][0], bookInfo['title']).replace("/",""), "wb") as outFile:
            fileMerger.write(outFile)

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("Missing url of eText!")
        sys.exit(0)
    main(sys.argv[1])
Below is the result of passing the code in the command line:
-------------------------------------------------------------
Output:
C:\zip> }) '})' is not recognized as an internal or external command, operable program or batch file. C:\zip> rootObj.update({ 'rootObj.update' is not recognized as an internal or external command, operable program or batch file. C:\zip> NameObject("/PageLabels"): pageLabels 'NameObject' is not recognized as an internal or external command, operable program or batch file. C:\zip> }) '})' is not recognized as an internal or external command, operable program or batch file. C:\zip> C:\zip> print("Writing PDF...") Can't find file (Writing PDF...) C:\zip> with open("{} - {}.pdf".format(bookData['bookid'][0], bookInfo['title']).replace("/",""), "wb") as outFile: 'with' is not recognized as an internal or external command, operable program or batch file. C:\zip> fileMerger.write(outFile) 'fileMerger.write' is not recognized as an internal or external command, operable program or batch file. C:\zip> C:\zip>if __name__ == '__main__': The syntax of the command is incorrect. C:\zip> if len(sys.argv) < 2: < was unexpected at this time. C:\zip> print("Missing url of eText!") Can't find file (Missing url of eText!) C:\zip> sys.exit(0) 'sys.exit' is not recognized as an internal or external command, operable program or batch file. C:\zip> main(sys.argv[1])
Reply


Messages In This Thread
Need help executing a program - by 1234kevind - Jun-28-2019, 01:32 AM
RE: Need help executing a program - by snippsat - Jun-28-2019, 02:11 PM
RE: Need help executing a program - by 1234kevind - Jun-28-2019, 02:51 PM

Possibly Related Threads…
Thread Author Replies Views Last Post
  Waiting for the user input while executing the program Lightningwalrus 3 12,378 Oct-24-2016, 05:49 PM
Last Post: Lightningwalrus

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020