Python Forum

Full Version: Questions regarding usage of pandas library
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
import pandas as pd
import matplotlib.pyplot as plt
# Open the text file in read mode
for i in range(1, 801):
    file_name = f"C:/Users/pp2c20/Downloads/IP data/IP DATA PRESSURE/HELLO-{i:03d}"
    with open(file_name, 'r') as file:
    # Read all lines from the file
         lines = file.readlines()

         first_data_1 = []
         second_data_1 = []
         
         #underside of airfoil coordinate (-0.499988,-0.00023067,-0.05)
         for line in lines[6:7]:
             # Split the line into a list of numbers
             data_list = line.strip().split(",")

             # Extract the desired data from the list and convert to float
             first_data_1.append(float(data_list[5]))
             second_data_1.append((float(data_list[5]))*106870.13)

         # underside of airfoil coordinate (-0.499982,-0.000300852,-0.05)
         first_data_2 = []
         second_data_2 = []
         
         for line in lines[7:8]:
             # Split the line into a list of numbers
             data_list = line.strip().split(",")

             # Extract the desired data from the list and convert to float
             first_data_2.append(float(data_list[5]))
             second_data_2.append((float(data_list[5]))*106870.13)
             
         # underside of airfoil coordinate (-0.499974,-0.000375813,-0.05)
         first_data_3 = []
         second_data_3 = []
         
         for line in lines[8:9]:
             # Split the line into a list of numbers
             data_list = line.strip().split(",")

             # Extract the desired data from the list and convert to float
             first_data_3.append(float(data_list[5]))
             second_data_3.append((float(data_list[5]))*106870.13)    
             
         first_data_4 = []
         second_data_4 = []
         
         # underside of airfoil (0.49996,-0.00045414,-0.05)
         for line in lines[474:475]:
              # Split the line into a list of numbers
              data_list = line.strip().split(",")

              # Extract the desired data from the list and convert to float
              first_data_4.append(float(data_list[5]))
              second_data_4.append((float(data_list[5]))*106870.13) 
         # underside of the airfoil (0.499972,-0.000374807,-0.05)      
         first_data_5 = []
         second_data_5 = []
         
         for line in lines[475:476]:
              # Split the line into a list of numbers
              data_list = line.strip().split(",")

              # Extract the desired data from the list and convert to float
              first_data_5.append(float(data_list[5]))
              second_data_5.append((float(data_list[5]))*106870.13)
              
         #underisde of the airfoil(0.499984,-0.000300251,-0.05)     
         first_data_6 = []
         second_data_6 = []
         
         for line in lines[476:477]:
               # Split the line into a list of numbers
               data_list = line.strip().split(",")

               # Extract the desired data from the list and convert to float
               first_data_6.append(float(data_list[5]))
               second_data_6.append((float(data_list[5]))*106870.13) 
         #upperisde of the airfoil(-0.499764,0.000889215,-0.05)      
         first_data_7 = []
         second_data_7 = []
         
         for line in lines[493:494]:
                 # Split the line into a list of numbers
                 data_list = line.strip().split(",")

                 # Extract the desired data from the list and convert to float
                 first_data_7.append(float(data_list[5]))
                 second_data_7.append((float(data_list[5]))*106870.13)
         #upperisde of the airfoil(-0.499709,0.000981514,-0.05)        
         first_data_8 = []
         second_data_8 = []
         
         for line in lines[495:496]:
                 # Split the line into a list of numbers
                 data_list = line.strip().split(",")

                 # Extract the desired data from the list and convert to float
                 first_data_8.append(float(data_list[5]))
                 second_data_8.append((float(data_list[5]))*106870.13) 
         #upperisde of the airfoil(-0.499646,0.00107383,-0.05)   
         first_data_9 = []
         second_data_9 = []
         
         for line in lines[496:497]:
                 # Split the line into a list of numbers
                 data_list = line.strip().split(",")

                 # Extract the desired data from the list and convert to float
                 first_data_9.append(float(data_list[5]))
                 second_data_9.append((float(data_list[5]))*106870.13)
         #print(first_data_1)

         # Create data frames for each set of data
         df1 = pd.DataFrame({
             'P*': first_data_1,
             'P': second_data_1,
             
         })
         df2 = pd.DataFrame({
             'P*': first_data_2,
             'P': second_data_2,
             
         })
         df3 = pd.DataFrame({
             'P*': first_data_3,
             'P': second_data_3,
             
         })
         df4 = pd.DataFrame({
             'P*': first_data_4,
             'P': second_data_4,
             
             
         })
         df5 = pd.DataFrame({
             'P*': first_data_5,
             'P': second_data_5,
             
             
         })
         df6 = pd.DataFrame({
             'P*': first_data_6,
             'P': second_data_6,
             
             
             
         })
         df7 = pd.DataFrame({
             'P*': first_data_7,
             'P': second_data_7,
             
             
             
         })
         df8 = pd.DataFrame({
             'P*': first_data_8,
             'P': second_data_8,
             
         })
         df9 = pd.DataFrame({
             'P*': first_data_9,
             'P': second_data_9,
         
         })
         df1.to_excel('C:/Users/pp2c20/Downloads/IP data/Pressure data/PRESSURE1.xlsx', index=False)
         df2.to_excel('C:/Users/pp2c20/Downloads/IP data/Pressure data/PRESSURE2.xlsx', index=False)
         df3.to_excel('C:/Users/pp2c20/Downloads/IP data/Pressure data/PRESSURE3.xlsx', index=False)
         df4.to_excel('C:/Users/pp2c20/Downloads/IP data/Pressure data/PRESSURE4.xlsx', index=False)
         df5.to_excel('C:/Users/pp2c20/Downloads/IP data/Pressure data/PRESSURE5.xlsx', index=False)
         df6.to_excel('C:/Users/pp2c20/Downloads/IP data/Pressure data/PRESSURE6.xlsx', index=False)
         df7.to_excel('C:/Users/pp2c20/Downloads/IP data/Pressure data/PRESSURE7.xlsx', index=False)
         df8.to_excel('C:/Users/pp2c20/Downloads/IP data/Pressure data/PRESSURE8.xlsx', index=False)
         df9.to_excel('C:/Users/pp2c20/Downloads/IP data/Pressure data/PRESSURE9.xlsx', index=False)
This is the code that i have used to extract sixth data from a text file for several number of lines. This process is repeated for 800 files.

from the code above, when i used df1.to_excel('C:/Users/pp2c20/Downloads/IP data/Pressure data/PRESSURE1.xlsx', index=False), i was expecting to have 800 lines of data. but there is only one line of data. i think it is looping over itself instead of creating a new row to put in the new data. I am unsure on how to solve this issue.
This question has nothing to do with Pandas. You have a logic problem with how you build your data lists.

You assign first_data_1 to an empty list each time you open a file. This prevents accumulating values in the list. You should initialize the list once, before you start reading files. After you open the file you only do append.

You repeat the same code over and over for each PRESSURE. This is very error prone. I would write the code like this:
import pandas as pd

line_nums = [6, 7, 8, 474, 475, 476, 493, 495, 496]  # Lines that contain the pressures we want.
buffers = [[] for _ in indices]  # Buffers to hold the pressure values.

# Read files one at a time
for i in range(1, 801):
    file_name = f"C:/Users/pp2c20/Downloads/IP data/IP DATA PRESSURE/HELLO-{i:03d}"
    with open(file_name, 'r') as file:
        lines = file.readlines()

    # Extract the pressure values.
    for line_num, buffer in zip(line_nums, buffers):
        buffer.append(lines[line_num].strip().split(",")[5])

# Convert each buffer to a dataframe, and write as a spreadsheet
for index, buffer in enumerate(buffers, start=1):
    df = pd.DataFrame(buffer, columns=["P*"])
    df["P"] = df["P*"] * 106870.13   # Do all the multiplications here
    df.to_excel(f"C:/Users/pp2c20/Downloads/IP data/Pressure data/PRESSURE{index}.xlsx", index=False)
Whenever you see yourself using variable names like first_data_1 and first_data_2, you should stop and think "Should this be a list?". Unless you are stopping at 2, the answer is YES! If you stop at two the answer is "Probably". Here I make a list of buffers so I can use indexing inside a loop instead of writing a separate block of code for each buffer. To control the loop, I have a list of file rows where I should look to get the pressure data. This lets me write:
Make list of empty pressure lists.  One for each line number
for file in pressure files:
    for each line number:
        read pressure value from file[line number]
        append pressure value to corresponding pressure list
After reading all the files I have a list of pressure lists. It is easy to write a loop to convert the list to a dataframe, and write the dattaframe as an excel file.
Output:
for each pressure list; create data frame from pressure data. compute second column. write dataframe as an excel file.
I don't understand the fourth line in your first set of code. May i know what is the function of "for _ in indices"?
It is called a list comprehension. Run it and see what it does. Very handy things, comprehensions.
buffers = [[] for _ in indices]
print(buffers)
or running python from a command line.
Output:
> python Python 3.10.7 (tags/v3.10.7:6cc6b13, Sep 5 2022, 14:08:36) [MSC v.1933 64 bit (AMD64)] on win32 Type "help", "copyright", "credits" or "license" for more information. >>> buffers = [[] for _ in range(3)] >>> print(buffers) [[], [], []] >>> buffers[1].append(42) >>> print(buffers) [[], [42], []] >>>
thank you very much
To be clear, this is a comprehension.
[[] for _ in indices]
This is just a for loop where I don't plan to use the index value.
for _ in indices
I could write it like this:
for i in indices
But static analysis tools rightfully complain about assigning values to a variable and then never using the variable. Using "_" as a placeholder for the index variable tells is a way of telling Python you want a loop that executes some number of times and that is all.