We depend on your feedback and ideas!
# ============================================================================
# This python script helps extracting information from a standard database in csv format
# then the data is written in the format that can be read by PfE
# ============================================================================
# ============================================================================
# Example of Standard Format in PfE ASCII .DAT File:
# This block explains the format of the ASCII .DAT file that the script generates.
# 100 "Calcite" : std number in PfE, standard name in ""
# "CaCO3" : the following six lines are just additional standard info
# Locality: WealGrey_Cornwall
# ID_EPMA: A01
# ID_EPMA2: CAL2_221
# BM: BM32890
# Reference: ChemLab_11076"
# -1 8 : flag to calculate as oxides (-1) or as elements (0),
# the number of elements in the standard
# 2.71 : standard density
# "Mg" "O" "Ca" "P" "Fe" "Mn" "Ti" "C" : list of elements in the standard in ""
# 1 1 1 2 1 1 1 1 : cations in oxide or 1 if element
# 1 0 1 5 1 1 2 2 : oxygens in oxide or 0 if element
# 0.082 47.960 37.450 0.017 0.024 0.996 0.022 12.000 : element wt.% in the standard
# "Carbonate" 0 0 "" "3" : standard type in "", -1 calculate formula/ 0 doesn't,
# number of cations per formula unit (cat p.f.u), formulas,
# and standard block in ""
# (Note: formula calculations are not included in this code, so these values are set to 0)
# ============================================================================
# ============================================================================
# Example of NHM Standard Database CSV Structure:
# The CSV structure should include the following columns but they can be changed accordingly:
# Name, Formula, Density, ID_EPMA, ID_PfE, ID_Old, BM, Type, Block, Locality, Reference, Mg... (and other elements)
# Example entry (for Calcite):
# Calcite,CaCO3,2.71,A01,100,CAL2_221,BM32890,Carbonate,3,WealGrey_Cornwall,ChemLab_11076,0.0816
# Example entry (for Chromium):
# Chromium,Cr,7.19,B01,101,PCR_117,Element,6,Synthetic,ChemLab
# When there is no reported information or value for an argument, it will be shown as empty (which is read as NaN).
# ============================================================================
# ============================================================================
#libraries needed to run the script. This is to creade dataframes
# ============================================================================
import pandas as pd
# ============================================================================
# Function: convert_ascii_std
# Purpose: Converts a single row of standard data into a PfE-readable ASCII .DAT format.
# Arguments:
# - row: The row containing standard information from the input CSV.
# - element_composition: A list of column names representing element compositions (weight %).
# - cat: A series representing the number of cations for each element.
# - oxy: A series representing the number of oxygens for each element.
# Returns:
# - A formatted string containing the .DAT content for a single standard entry.
# ============================================================================
def convert_ascii_std(row, element_composition, cat, oxy):
# --- Extract Standard Metadata ---
name = row["Name"] # Name of the standard
formula = row["Formula"] # Formula of the standard
density = row["Density"] # Density of the standard
id_pfe = row["ID_PfE"] # Standard number in PfE
id_EPMA = row["ID_EPMA"] # EPMA ID of the standard
id_Old = row["ID_Old"] # Old ID (if available)
BM = row["BM"] # Museum entry number (if applicable)
type = row["Type"] # Type of standard (e.g., carbonate, silicate, etc.)
block = row["Block"] # Block number in the database
locality = row["Locality"] # Locality or source of the standard
reference = row["Reference"] # Reference or source information
# --- Extract Element Composition Data ---
wt_Element = row[element_composition] # Weight percentages of elements in the standard
# Remove elements that have zero values in the weight percent (i.e., ignore them)
non_zero_elements = wt_Element[wt_Element > 0]
# --- Filter Cations and Oxygens for Non-Zero Elements ---
cat_clean = cat[wt_Element > 0] # Corresponding cation values for non-zero elements
oxy_clean = oxy[wt_Element > 0] # Corresponding oxygen values for non-zero elements
# --- Determine Flag for Oxides vs Elements ---
display_ox = -1 if "O" in row and row["O"] > 0 else 0 # Flag: -1 for oxides, 0 for elements
# --- Count Non-Zero Elements ---
elements_shown = len(non_zero_elements) # Number of elements with non-zero weight percentages
# --- Generate Element Symbols List ---
element_symbols = [f'"{elem}"' for elem in non_zero_elements.index] # List of element symbols for non-zero elements
# --- Generate Element Content Data ---
element_content = [f'{val:.3f}' for val in non_zero_elements] # Weight % of each element formatted to 3 decimals
cat_values = [f'{val}' for val in cat_clean] # Cation values
oxy_values = [f'{val}' for val in oxy_clean] # Oxygen values
# --- Construct ASCII .DAT Content ---
output = f'''
{id_pfe} "{name}"
"{formula}
Locality: {locality}
ID_EPMA: {id_EPMA}
ID_EPMA2: {id_Old}
BM: {BM}
Reference: {reference}"
{display_ox} {elements_shown}
{density:.2f}
'''
# Add the element symbols (e.g., "Mg", "Ca", etc.)
output += " " + " ".join(element_symbols) + "\n"
# Add the corresponding cations for the elements
output += " " + " ".join(cat_values) + "\n"
# Add the corresponding oxygens for the elements
output += " " + " ".join(oxy_values) + "\n"
# Add the weight % content for the elements
output += " " + " ".join(element_content) + "\n"
# Add additional standard details (e.g., type, formula, block)
output += f'"{type}" 0 0 "{formula}" "{block}"'
return output
# ============================================================================
# Function: process_csv
# Purpose: Reads an input CSV file and generates a single .DAT file for each standard.
# Arguments:
# - input_csv: Path to the input CSV file containing the standard information.
# - output_file: Path to the output .DAT file where the converted data will be saved.
# - catox_csv: Path to the CSV file containing cation and oxygen data for each element.
# ============================================================================
def process_csv(input_csv, output_file, catox_csv):
# --- Read Input CSV Files ---
df = pd.read_csv(input_csv) # Standard information CSV (contains name, formula, etc.)
dfcatoxy = pd.read_csv(catox_csv) # Cation and oxygen data CSV (contains element cations and oxygens)
# --- Identify Element Composition Columns ---
element_composition = df.columns[11:] # The columns containing the element composition data (adjust if needed)
# --- Extract Cation and Oxygen Data ---
cat = dfcatoxy.iloc
[o] # Cation values (first row of the cation/oxygen file)
oxy = dfcatoxy.iloc[1] # Oxygen values (second row of the cation/oxygen file)
# --- Open Output File for Writing ---
with open(output_file, 'w') as out_file:
# Iterate over each row in the DataFrame and convert to ASCII format
for idx, row in df.iterrows():
# Generate the .DAT content for the current row
dat_content = convert_ascii_std(row, element_composition, cat, oxy)
# Write the generated content to the output file
out_file.write(dat_content)
# --- Remove First Blank Line ---
# This step removes the first blank line that may cause issues when reading the file in PfE
with open(output_file, 'r') as file:
lines = file.readlines()
with open(output_file, 'w') as file:
file.writelines(lines[1:]) # Rewrite the file without the first blank line
print(f"Generated {output_file} with the first line removed")
# ============================================================================
# Main Execution Block
# Purpose: Process the input CSV and generate the output .DAT file for PfE.
# ============================================================================
# Define input and output file paths
input_csv = 'EPMA_Standards.csv' # Replace with the actual file path to your standard CSV
catox_csv = 'cat_ox.csv' # Replace with the actual file path to your cation/oxygen data CSV
output_file = 'standardsforPfE.dat' # Output file name for the converted .DAT file
# Process the CSV and generate the .DAT file
process_csv(input_csv, output_file, catox_csv)
Quote from: Jens Andersen on November 06, 2024, 06:56:12 AMThanks Jon, that's very helpful. I will proceed with that model for now, it sounds like a good way for me. When you load a file setup, does it automatically load the wavescans as well?
Quote from: Jens Andersen on November 06, 2024, 05:22:51 AMPerhaps I should do a single file with all of the standards and wavescans (but no sample data) and use that as the template for the different variant setups that I need to do (with "save as" to a different file name).