Source code for vaspparser.vasp.output

import contextlib
import os
import posixpath
import warnings
from typing import Callable, Optional, Sequence, Union

import numpy as np
from ase.atoms import Atoms

from vaspparser.dft.bader import Bader
from vaspparser.dft.waves.electronic import ElectronicStructure
from vaspparser.vasp.parser.oszicar import Oszicar
from vaspparser.vasp.parser.outcar import Outcar, OutcarCollectError
from vaspparser.vasp.procar import Procar
from vaspparser.vasp.structure import read_atoms, vasp_sorter
from vaspparser.vasp.vasprun import Vasprun as Vr
from vaspparser.vasp.vasprun import VasprunError, VasprunWarning
from vaspparser.vasp.volumetric_data import VaspVolumetricData


[docs] class Output: """ Handles the output from a VASP simulation. Attributes: electronic_structure: Gives the electronic structure of the system electrostatic_potential: Gives the electrostatic/local potential of the system charge_density: Gives the charge density of the system """
[docs] def __init__(self): self._structure = None self.outcar = Outcar() self.oszicar = Oszicar() self.generic_output = GenericOutput() self.description = ( "This contains all the output static from this particular vasp run" ) self.charge_density = VaspVolumetricData() self.electrostatic_potential = VaspVolumetricData() self.procar = Procar() self.electronic_structure = ElectronicStructure() self.vp_new = Vr()
@property def structure(self): """ Getter for the output structure """ return self._structure @structure.setter def structure(self, atoms: Atoms): """ Setter for the output structure """ self._structure = atoms
[docs] def collect( self, directory: str = os.getcwd(), sorted_indices: Optional[np.ndarray] = None, es_class=ElectronicStructure, ): """ Collects output from the working directory Args: directory (str): Path to the directory sorted_indices (np.array/None): """ if sorted_indices is None: sorted_indices = vasp_sorter(self.structure) files_present = os.listdir(directory) log_dict = {} vasprun_working, outcar_working = False, False if not ("OUTCAR" in files_present or "vasprun.xml" in files_present): raise OSError("Either the OUTCAR or vasprun.xml files need to be present") if "OSZICAR" in files_present: self.oszicar.from_file(filename=posixpath.join(directory, "OSZICAR")) if "OUTCAR" in files_present: try: self.outcar.from_file(filename=posixpath.join(directory, "OUTCAR")) outcar_working = True except OutcarCollectError as e: warnings.warn( f"OUTCAR present, but could not be parsed: {e}!", stacklevel=2 ) outcar_working = False if "vasprun.xml" in files_present: try: with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") self.vp_new.from_file( filename=posixpath.join(directory, "vasprun.xml") ) if any(isinstance(warn.category, VasprunWarning) for warn in w): warnings.warn( "vasprun.xml parsed but with some inconsistencies. " "Check vasp output to be sure", VasprunWarning, stacklevel=2, ) except VasprunError: warnings.warn( "Unable to parse the vasprun.xml file. Will attempt to get data from OUTCAR", stacklevel=2, ) else: # If parsing the vasprun file does not throw an error, then set to True vasprun_working = True if outcar_working: log_dict["temperature"] = self.outcar.parse_dict["temperatures"] log_dict["stresses"] = self.outcar.parse_dict["stresses"] log_dict["pressures"] = self.outcar.parse_dict["pressures"] log_dict["elastic_constants"] = self.outcar.parse_dict["elastic_constants"] self.generic_output.dft_log_dict["n_elect"] = self.outcar.parse_dict[ "n_elect" ] if len(self.outcar.parse_dict["magnetization"]) > 0: magnetization = np.array( self.outcar.parse_dict["magnetization"], dtype=object ) final_magmoms = np.array( self.outcar.parse_dict["final_magmoms"], dtype=object ) # magnetization[sorted_indices] = magnetization.copy() if len(final_magmoms) != 0: if len(final_magmoms.shape) == 3: final_magmoms[:, sorted_indices, :] = final_magmoms.copy() else: final_magmoms[:, sorted_indices] = final_magmoms.copy() self.generic_output.dft_log_dict["magnetization"] = ( magnetization.tolist() ) self.generic_output.dft_log_dict["final_magmoms"] = ( final_magmoms.tolist() ) self.generic_output.dft_log_dict["e_fermi_list"] = self.outcar.parse_dict[ "e_fermi_list" ] self.generic_output.dft_log_dict["vbm_list"] = self.outcar.parse_dict[ "vbm_list" ] self.generic_output.dft_log_dict["cbm_list"] = self.outcar.parse_dict[ "cbm_list" ] if vasprun_working: log_dict["forces"] = self.vp_new.vasprun_dict["forces"] log_dict["cells"] = self.vp_new.vasprun_dict["cells"] log_dict["volume"] = np.linalg.det(self.vp_new.vasprun_dict["cells"]) # The vasprun parser also returns the energies printed again after the final SCF cycle under the key # "total_energies", but due to a bug in the VASP output, the energies reported there are wrong in Vasp 5.*; # instead use the last energy from the scf cycle energies # BUG link: https://ww.vasp.at/forum/viewtopic.php?p=19242 try: # bug report is not specific to which Vasp5 versions are affected; be safe and workaround for all of # them is_vasp5 = self.vp_new.vasprun_dict["generator"]["version"].startswith( "5." ) except KeyError: # in case the parser didn't read the version info is_vasp5 = True if is_vasp5: log_dict["energy_pot"] = np.array( [e[-1] for e in self.vp_new.vasprun_dict["scf_fr_energies"]] ) else: # total energies refers here to the total energy of the electronic system, not the total system of # electrons plus (potentially) moving ions; hence this is the energy_pot log_dict["energy_pot"] = self.vp_new.vasprun_dict["total_fr_energies"] if "kinetic_energies" in self.vp_new.vasprun_dict: log_dict["energy_tot"] = ( log_dict["energy_pot"] + self.vp_new.vasprun_dict["kinetic_energies"] ) else: log_dict["energy_tot"] = log_dict["energy_pot"] log_dict["steps"] = np.arange(len(log_dict["energy_tot"])) log_dict["positions"] = self.vp_new.vasprun_dict["positions"] log_dict["forces"][:, sorted_indices] = log_dict["forces"].copy() log_dict["positions"][:, sorted_indices] = log_dict["positions"].copy() log_dict["positions"] = np.einsum( "nij,njk->nik", log_dict["positions"], log_dict["cells"] ) # log_dict["scf_energies"] = self.vp_new.vasprun_dict["scf_energies"] # log_dict["scf_dipole_moments"] = self.vp_new.vasprun_dict["scf_dipole_moments"] self.electronic_structure = self.vp_new.get_electronic_structure( es_class=es_class, ) if self.electronic_structure.grand_dos_matrix is not None: self.electronic_structure.grand_dos_matrix[ :, :, :, sorted_indices, : ] = self.electronic_structure.grand_dos_matrix[:, :, :, :, :].copy() if self.electronic_structure.resolved_densities is not None: self.electronic_structure.resolved_densities[ :, sorted_indices, :, : ] = self.electronic_structure.resolved_densities[:, :, :, :].copy() self.structure.positions = log_dict["positions"][-1] self.structure.set_cell(log_dict["cells"][-1]) self.generic_output.dft_log_dict["potentiostat_output"] = ( self.vp_new.get_potentiostat_output() ) valence_charges_orig = self.vp_new.get_valence_electrons_per_atom() valence_charges = valence_charges_orig.copy() valence_charges[sorted_indices] = valence_charges_orig self.generic_output.dft_log_dict["valence_charges"] = valence_charges elif outcar_working: # log_dict = self.outcar.parse_dict.copy() if len(self.outcar.parse_dict["energies"]) == 0: raise VaspCollectError("Error in parsing OUTCAR") log_dict["energy_tot"] = self.outcar.parse_dict["energies"] log_dict["temperature"] = self.outcar.parse_dict["temperatures"] log_dict["stresses"] = self.outcar.parse_dict["stresses"] log_dict["pressures"] = self.outcar.parse_dict["pressures"] log_dict["forces"] = self.outcar.parse_dict["forces"] log_dict["positions"] = self.outcar.parse_dict["positions"] log_dict["forces"][:, sorted_indices] = log_dict["forces"].copy() log_dict["positions"][:, sorted_indices] = log_dict["positions"].copy() if len(log_dict["positions"].shape) != 3 or log_dict["positions"].shape[ 1 ] != len(sorted_indices): raise VaspCollectError("Improper OUTCAR parsing") if len(log_dict["forces"].shape) != 3 or log_dict["forces"].shape[1] != len( sorted_indices ): raise VaspCollectError("Improper OUTCAR parsing") log_dict["time"] = self.outcar.parse_dict["time"] log_dict["steps"] = self.outcar.parse_dict["steps"] log_dict["cells"] = self.outcar.parse_dict["cells"] log_dict["volume"] = np.array( [np.linalg.det(cell) for cell in self.outcar.parse_dict["cells"]] ) self.generic_output.dft_log_dict["scf_energy_free"] = ( self.outcar.parse_dict["scf_energies"] ) self.generic_output.dft_log_dict["scf_dipole_mom"] = self.outcar.parse_dict[ "scf_dipole_moments" ] self.generic_output.dft_log_dict["n_elect"] = self.outcar.parse_dict[ "n_elect" ] self.generic_output.dft_log_dict["energy_int"] = self.outcar.parse_dict[ "energies_int" ] self.generic_output.dft_log_dict["energy_free"] = self.outcar.parse_dict[ "energies" ] self.generic_output.dft_log_dict["energy_zero"] = self.outcar.parse_dict[ "energies_zero" ] self.generic_output.dft_log_dict["energy_int"] = self.outcar.parse_dict[ "energies_int" ] if "PROCAR" in files_present: try: self.electronic_structure = self.procar.from_file( filename=posixpath.join(directory, "PROCAR") ) # Even the atom resolved values have to be sorted from the vasp atoms order to the Atoms order self.electronic_structure.grand_dos_matrix[ :, :, :, sorted_indices, : ] = self.electronic_structure.grand_dos_matrix[:, :, :, :, :].copy() try: self.electronic_structure.efermi = self.outcar.parse_dict[ "fermi_level" ] except KeyError: self.electronic_structure.efermi = self.vp_new.vasprun_dict[ "efermi" ] except ValueError: pass # important that we "reverse sort" the atoms in the vasp format into the atoms in the atoms class self.generic_output.log_dict = log_dict if vasprun_working: # self.dft_output.log_dict["parameters"] = self.vp_new.vasprun_dict["parameters"] self.generic_output.dft_log_dict["scf_dipole_mom"] = ( self.vp_new.vasprun_dict["scf_dipole_moments"] ) if len(self.generic_output.dft_log_dict["scf_dipole_mom"][0]) > 0: total_dipole_moments = np.array( [ dip[-1] for dip in self.generic_output.dft_log_dict["scf_dipole_mom"] ] ) self.generic_output.dft_log_dict["dipole_mom"] = total_dipole_moments self.generic_output.dft_log_dict["scf_energy_int"] = ( self.vp_new.vasprun_dict["scf_energies"] ) self.generic_output.dft_log_dict["scf_energy_free"] = ( self.vp_new.vasprun_dict["scf_fr_energies"] ) self.generic_output.dft_log_dict["scf_energy_zero"] = ( self.vp_new.vasprun_dict["scf_0_energies"] ) self.generic_output.dft_log_dict["energy_int"] = np.array( [ e_int[-1] for e_int in self.generic_output.dft_log_dict["scf_energy_int"] ] ) self.generic_output.dft_log_dict["energy_free"] = np.array( [ e_free[-1] for e_free in self.generic_output.dft_log_dict["scf_energy_free"] ] ) # Overwrite energy_free with much better precision from the OSZICAR file if "energy_pot" in self.oszicar.parse_dict and np.array_equal( self.generic_output.dft_log_dict["energy_free"], np.round(self.oszicar.parse_dict["energy_pot"], 8), ): self.generic_output.dft_log_dict["energy_free"] = ( self.oszicar.parse_dict["energy_pot"] ) self.generic_output.dft_log_dict["energy_zero"] = np.array( [ e_zero[-1] for e_zero in self.generic_output.dft_log_dict["scf_energy_zero"] ] ) self.generic_output.dft_log_dict["n_elect"] = float( self.vp_new.vasprun_dict["parameters"]["electronic"]["NELECT"] ) if "kinetic_energies" in self.vp_new.vasprun_dict: # scf_energy_kin is for backwards compatibility self.generic_output.dft_log_dict["scf_energy_kin"] = ( self.vp_new.vasprun_dict["kinetic_energies"] ) self.generic_output.dft_log_dict["energy_kin"] = ( self.vp_new.vasprun_dict["kinetic_energies"] ) if ( "LOCPOT" in files_present and os.stat(posixpath.join(directory, "LOCPOT")).st_size != 0 ): self.electrostatic_potential.from_file( filename=posixpath.join(directory, "LOCPOT"), normalize=False ) if ( "CHGCAR" in files_present and os.stat(posixpath.join(directory, "CHGCAR")).st_size != 0 ): self.charge_density.from_file( filename=posixpath.join(directory, "CHGCAR"), normalize=True ) self.generic_output.bands = self.electronic_structure
[docs] def to_dict(self) -> dict: """ Serialize the parsed VASP output into a nested dictionary. The returned dictionary contains the following top-level keys (when available): - ``"description"``: human-readable label for this output object - ``"generic"``: positions, forces, cells, energies, and DFT-specific quantities - ``"structure"``: final atomistic structure as a dictionary - ``"electrostatic_potential"``: LOCPOT data (total and optional diff) - ``"charge_density"``: CHGCAR data (total and optional spin diff) - ``"electronic_structure"``: k-points, eigenvalues, occupancies, and DOS data - ``"outcar"``: OUTCAR-specific quantities not available from vasprun.xml Returns: dict: Hierarchical dictionary of parsed VASP output """ output_dict = { "description": self.description, "generic": self.generic_output.to_dict(), } if self.structure is not None and type(self.structure) == Atoms: output_dict["structure"] = self.structure.todict() elif self.structure is not None: output_dict["structure"] = self.structure.to_dict() if self.electrostatic_potential.total_data is not None: output_dict["electrostatic_potential"] = ( self.electrostatic_potential.to_dict() ) if self.charge_density.total_data is not None: output_dict["charge_density"] = self.charge_density.to_dict() if len(self.electronic_structure.kpoint_list) > 0: output_dict["electronic_structure"] = self.electronic_structure.to_dict() if len(self.outcar.parse_dict.keys()) > 0: output_dict["outcar"] = self.outcar.to_dict_minimal() return output_dict
[docs] class GenericOutput: """ This class stores the generic output like different structures, energies and forces from a simulation in a highly generic format. Usually the user does not have to access this class. Attributes: log_dict (dict): A dictionary of all tags and values of generic data (positions, forces, etc) """
[docs] def __init__(self): self.log_dict = {} self.dft_log_dict = {} self.description = "generic_output contains generic output static" self._bands = ElectronicStructure()
@property def bands(self) -> ElectronicStructure: """ElectronicStructure: The electronic band structure object.""" return self._bands @bands.setter def bands(self, val: ElectronicStructure): self._bands = val
[docs] def to_dict(self) -> dict: """ Serialize the generic and DFT-specific output into a dictionary. The returned dictionary merges ``log_dict`` (generic quantities such as forces, positions, energies, cells) with a ``"dft"`` sub-dictionary containing DFT-specific quantities (magnetization, Fermi level lists, SCF energies, etc.) and optionally a ``"bands"`` key with the electronic structure. Returns: dict: Dictionary of all generic and DFT output quantities """ go_dict, dft_dict = {}, {} for key, val in self.log_dict.items(): go_dict[key] = val for key, val in self.dft_log_dict.items(): dft_dict[key] = val go_dict["dft"] = dft_dict if self.bands.eigenvalue_matrix is not None: go_dict["dft"]["bands"] = self.bands.to_dict() return go_dict
[docs] class VaspCollectError(ValueError): """Raised when VASP output files are present but cannot be parsed correctly.""" pass
[docs] def parse_vasp_output( working_directory: str, structure: Optional[Atoms] = None, sorted_indices: Optional[Union[Sequence[int], np.ndarray]] = None, read_atoms_funct: Callable = read_atoms, es_class=ElectronicStructure, bader_class=Bader, output_parser_class=Output, ) -> dict: """ Parse the VASP output in the working_directory and return it as a hierarchical dictionary. This is the primary entry point for parsing a finished VASP calculation. The function reads vasprun.xml (preferred) and/or OUTCAR, OSZICAR, LOCPOT, CHGCAR, PROCAR, and CONTCAR/POSCAR files as available. If both vasprun.xml and OUTCAR are present, vasprun.xml is used for energies, forces, positions, and electronic structure, while unique OUTCAR quantities (stresses, elastic constants, Broyden mixing mesh, etc.) supplement the result. Bader charge analysis is performed automatically when AECCAR0 and AECCAR2 are present. Args: working_directory (str): Path to the directory containing the VASP output files structure (Atoms): Input atomistic structure. If None, it is read from CONTCAR/POSCAR. sorted_indices (list/numpy.ndarray): Permutation indices mapping VASP atom order (grouped by species) to the order in ``structure``. Computed automatically if None. read_atoms_funct (callable): Function used to read structure files; defaults to ``read_atoms`` which parses POSCAR/CONTCAR format. es_class: Class used to store electronic structure data; defaults to ``ElectronicStructure``. bader_class: Class used for Bader charge analysis; defaults to ``Bader``. output_parser_class: Class used to aggregate all output; defaults to ``Output``. Returns: dict: Hierarchical output dictionary as returned by ``Output.to_dict()``. Raises: VaspCollectError: If required output files are found but cannot be parsed. OSError: If no OUTCAR or vasprun.xml is present in working_directory. """ output_parser = output_parser_class() if structure is None or len(structure) == 0: try: structure = get_final_structure_from_file( working_directory=working_directory, filename="CONTCAR", read_atoms_funct=read_atoms_funct, ) except OSError: structure = get_final_structure_from_file( working_directory=working_directory, filename="POSCAR", read_atoms_funct=read_atoms_funct, ) if sorted_indices is None: sorted_indices_array = np.arange(len(structure)) else: sorted_indices_array = np.asarray(sorted_indices) output_parser.structure = structure.copy() try: output_parser.collect( directory=working_directory, sorted_indices=sorted_indices_array, es_class=es_class, ) except VaspCollectError: raise # Try getting high precision positions from CONTCAR with contextlib.suppress(IOError, ValueError, FileNotFoundError): output_parser.structure = get_final_structure_from_file( working_directory=working_directory, filename="CONTCAR", structure=structure, sorted_indices=sorted_indices_array, read_atoms_funct=read_atoms_funct, ) # Bader analysis if os.path.isfile(os.path.join(working_directory, "AECCAR0")) and os.path.isfile( os.path.join(working_directory, "AECCAR2") ): bader = bader_class(working_directory=working_directory, structure=structure) try: charges_orig, volumes_orig = bader.compute_bader_charges() except ValueError: warnings.warn("Invoking Bader charge analysis failed", stacklevel=2) else: charges, volumes = charges_orig.copy(), volumes_orig.copy() charges[sorted_indices_array] = charges_orig volumes[sorted_indices_array] = volumes_orig if "valence_charges" in output_parser.generic_output.dft_log_dict: valence_charges = output_parser.generic_output.dft_log_dict[ "valence_charges" ] # Positive values indicate electron depletion output_parser.generic_output.dft_log_dict["bader_charges"] = ( valence_charges - charges ) output_parser.generic_output.dft_log_dict["bader_volumes"] = volumes return output_parser.to_dict()
[docs] def get_final_structure_from_file( working_directory: str, filename: str = "CONTCAR", structure: Optional[Atoms] = None, sorted_indices: Optional[Union[Sequence[int], np.ndarray]] = None, read_atoms_funct: Callable = read_atoms, ) -> Atoms: """ Read the final structure from a POSCAR-format file (typically CONTCAR). The CONTCAR file written by VASP at the end of a relaxation or MD run contains the final ionic positions, cell vectors, and optionally predictor-corrector velocities. If ``structure`` is provided, its atom ordering is preserved and only the cell and positions are updated; otherwise the structure is read entirely from the file. Args: working_directory (str): Path to the directory containing the structure file filename (str): Name of the structure file to read (default: ``"CONTCAR"``) structure (Atoms): Reference input structure used to set the species list and atom ordering. If None, species are read directly from the file. sorted_indices (list/numpy.ndarray): Permutation indices mapping the VASP atom order to the order in ``structure``. Computed from ``structure`` if None. read_atoms_funct (callable): Function for parsing POSCAR-format files Returns: ase.atoms.Atoms: The final structure with updated positions and cell Raises: OSError: If the file cannot be read or the positions are inconsistent """ filename = posixpath.join(working_directory, filename) if structure is not None and sorted_indices is None: sorted_indices_array = vasp_sorter(structure) elif sorted_indices is not None: sorted_indices_array = np.asarray(sorted_indices) else: sorted_indices_array = None if structure is None: try: output_structure = read_atoms_funct(filename=filename) input_structure = output_structure.copy() except (OSError, IndexError, ValueError): raise OSError("Unable to read output structure") else: input_structure = structure.copy() if type(input_structure) == Atoms: species_list = input_structure.get_chemical_symbols() else: species_list = input_structure.get_parent_symbols() try: output_structure = read_atoms_funct( filename=filename, species_list=species_list, ) input_structure.cell = output_structure.cell.copy() input_structure.positions[sorted_indices_array] = output_structure.positions except (OSError, IndexError, ValueError): raise OSError("Unable to read output structure") return input_structure