import pandas as pd import numpy as np import os from nltk import word_tokenize def func(i, j): directory_path = "//data/residential" entries = os.listdir(directory_path)[i:j] folders = [entry for entry in entries if os.path.join(directory_path, entry)] entries = [os.path.join(directory_path, folder) for folder in folders] combined_data = pd.DataFrame() for file_path in entries: current_data = pd.read_csv(file_path, encoding='utf-8') tokenize_col = current_data.columns tokenized_sentences_list = [word_tokenize(text) for text in tokenize_col] occurrences_electricity = np.array( [(i, j) for i, sublist in enumerate(tokenized_sentences_list) for j, word in enumerate(sublist) if word == 'Electricity'])[:, 0] Electricity_consumption_wind_solar = current_data[tokenize_col[occurrences_electricity]].sum(axis=1) current_data = Electricity_consumption_wind_solar.to_frame("Household_load") combined_data = pd.concat([combined_data, current_data], axis=1) Series_type = combined_data.sum(axis=1) Series_type.to_csv('houseload.csv', index=False) total_energy = np.array(Series_type).ravel() return total_energy if __name__ == '__main__': func(0, 38)