# Wwise Sound Extractor (by Nicknine) # Requires Python 3 # This script loads soundbanksinfo.xml and uses it to extract the sound files with proper names. import os import struct import shutil import xml.etree.ElementTree # This is the script config. Update these variables appropriately for your game. # Path to soundbanksinfo.xml gameRoot=r"D:\Games\Steam\steamapps\common\DOOM\base\sound\soundbanks\pc" # Output path targetDirectory=r"E:\GameRips\doom" # List PCK files used by the game here. You can skip PCKs for the languages you don't need. pckFiles = [ "initial.pck", "streamed_sfx.pck", "streamed_sfx_dlc1.pck", "streamed_sfx_dlc2.pck", "streamed_sfx_dlc3.pck", "English(US)/streamed_vo.pck", "English(US)/streamed_vo_dlc1.pck", "English(US)/streamed_vo_dlc2.pck", "English(US)/streamed_vo_dlc3.pck", ] # List banks that you want to extract in "name:language" format. # Language is always "SFX" for normal banks, check soundbanksinfo.xml for what languages are available for voice banks. targetBanks = [ "doom_music:SFX", "doom_vo:English(US)", ] # NOTE: Each bank will be extracted into its own sub-folder. # Actual script starts here. def unpackLE(typ,data): return struct.unpack("<"+typ,data) def unpackBE(typ,data): return struct.unpack(">"+typ,data) def detectEndian(f,pos): prev=f.tell() f.seek(pos) data=f.read(4) LETest=unpackLE("I",data)[0] BETest=unpackBE("I",data)[0] f.seek(prev) return True if LETest > BETest else False class Bank: def __init__(self,path): self.path=path self.f=open(path,"rb") self.bigEndian=detectEndian(self.f,0x04) self.unpack=unpackBE if self.bigEndian else unpackLE self.f.seek(0,2) self.size=self.f.tell() self.f.seek(0) magic, size = self.getSectionHeader() if magic!=b"BKHD": raise Exception("%s: Wrong BNK header magic." % self.path) tableOffset, tableSize = self.findSection(b"DIDX") dataOffset, dataSize = self.findSection(b"DATA") # Parse sound entries from DIDX table. self.f.seek(tableOffset) self.numFiles=tableSize//0x0c self.files=dict() for i in range(self.numFiles): file=BankFile(self,dataOffset) self.files[file.id]=file def getVar(self): return self.unpack("I",self.f.read(4))[0] def findSection(self,targetSection): self.f.seek(0) while self.f.tell()!=self.size: magic,size=self.getSectionHeader() offset=self.f.tell() if magic==targetSection: return (offset,size) self.f.seek(size,1) raise Exception("%s: Did not find section %s." % (self.path,targetSection.decode())) def getSectionHeader(self): magic=self.f.read(4) size=self.unpack("I",self.f.read(4))[0] return (magic, size) class BankFile: def __init__(self,bank,dataOffset): self.id=bank.getVar() self.offset=bank.getVar() self.size=bank.getVar() self.offset+=dataOffset class Package: def __init__(self,path): self.path=path self.f=open(path,"rb") self.magic=self.f.read(4) if self.magic!=b"AKPK": raise Exception("%s: Wrong PCK header magic." % self.path) self.bigEndian=detectEndian(self.f,0x08) self.unpack=unpackBE if self.bigEndian else unpackLE self.headerSize=self.getVar() self.flag=self.getVar() self.langsSize=self.getVar() self.banksSize=self.getVar() self.soundsSize=self.getVar() if self.langsSize+self.banksSize+self.soundsSize+0x10==self.headerSize: # Initial version. self.version=1 else: # 2012 revision which added the fourth section and changed file size from 64-bit to 32-bit. self.version=2 self.unk=self.getVar() # Parse languages. self.langsOffset=self.f.tell() self.numLangs=self.getVar() self.langs=dict() for i in range(self.numLangs): lang=PackageLanguage(self) self.langs[lang.id]=lang # Skip language names. self.f.seek(self.langsOffset+self.langsSize) # Parse embedded banks. self.numBanks=self.getVar() self.banks=dict() for i in range(self.numBanks): file=PackageFile(self) self.banks[file.id]=file # Parse sounds. self.numFiles=self.getVar() self.files=dict() for i in range(self.numFiles): file=PackageFile(self) self.files[file.id]=file streamedFiles[file.id] = self, file def readLangName(self): bytestring=b"" if self.version==1 or gamePlatform=="Windows": while True: byte=self.f.read(2) if byte==b"\x00\x00": break bytestring+=byte return bytestring.decode("utf-16be") if self.bigEndian else bytestring.decode("utf-16le") else: while True: byte=self.f.read(1) if byte==b"\x00": break bytestring+=byte return bytestring.decode("utf-8") def getVar(self): return self.unpack("I",self.f.read(4))[0] def getVar64(self): return self.unpack("Q",self.f.read(8))[0] class PackageLanguage: def __init__(self,pck): self.nameOffset=pck.getVar() self.id=pck.getVar() pos=pck.f.tell() pck.f.seek(pck.langsOffset+self.nameOffset) self.name=pck.readLangName() pck.f.seek(pos) class PackageFile: def __init__(self,pck): self.id=pck.getVar() self.mult=pck.getVar() self.size=pck.getVar64() if pck.version==1 else pck.getVar() self.offset=pck.getVar() self.lang=pck.getVar() if self.mult!=0: self.offset*=self.mult; def extractFile(f,offset,size,name,outPath): print(name) soundPath=os.path.join(outPath,name) f.seek(offset) folderPath=os.path.dirname(soundPath) os.makedirs(folderPath,exist_ok=True) f2=open(soundPath,"wb") f2.write(f.read(sound.size)) f2.close() def extractLooseFile(path,name,outPath): print(name) soundPath=os.path.join(outPath,name) folderPath=os.path.dirname(soundPath) os.makedirs(folderPath,exist_ok=True) shutil.copyfile(path,soundPath) # Normalize paths. gameRoot=os.path.normpath(gameRoot) targetDirectory=os.path.normpath(targetDirectory) tree=xml.etree.ElementTree.parse(os.path.join(gameRoot,"soundbanksinfo.xml")) root=tree.getroot() gamePlatform=root.get("Platform") schemaVersion=int(root.get("SchemaVersion")) print("Loading package files...") packages=list() streamedFiles=dict() for pck in pckFiles: pckPath=os.path.join(gameRoot,os.path.normpath(pck)) packages.append(Package(pckPath)) print("Loading streamed file names...") streamedFileNames=dict() for node in list(root.find("StreamedFiles")): if node.get("UsingReferenceLanguageAsStandIn")=="true": continue # Skip these, they're just duplicates. id=int(node.get("Id")) lang=node.get("Language") streamedFileNames[id] = os.path.normpath(node.find("Path").text), lang for targetBank in targetBanks: targetName, targetLang = targetBank.split(":") # Find bank with the specified name and language. bankNode=None for node in list(root.find("SoundBanks")): if node.get("Language")==targetLang and node.find("ShortName").text==targetName: bankNode=node break if not bankNode: print("Bank %s was not found" % targetBank) continue print(targetBank) bankOutPath=os.path.join(targetDirectory,targetName) # Extract all included files. if schemaVersion<10: IncludedMemoryFiles=bankNode.find("IncludedFullFiles") hasIncludedFiles=len(list(IncludedMemoryFiles))!=0 else: IncludedMemoryFiles=bankNode.find("IncludedMemoryFiles") hasIncludedFiles=IncludedMemoryFiles!=None if hasIncludedFiles: # Only load BNK file if there are any files to extract, empty banks have no DIDX and DATA sections at all. bankPath=os.path.join(gameRoot,bankNode.find("Path").text) bank=Bank(bankPath) for node in list(IncludedMemoryFiles): id=int(node.get("Id")) name=os.path.normpath(node.find("Path").text) if id in streamedFileNames: continue # Prefetched streamed sound, ignore. try: sound=bank.files[id] except Exception: raise Exception("Internal file %u not found in bank %s. Check your files!" % (id,targetBank)) extractFile(bank.f,sound.offset,sound.size,name,bankOutPath) bank.f.close() # Extract all streamed files. ReferencedStreamedFiles=bankNode.find("ReferencedStreamedFiles") if ReferencedStreamedFiles: for node in list(ReferencedStreamedFiles): id=int(node.get("Id")) name, lang = streamedFileNames[id] ext=os.path.splitext(name)[1] try: # Try looking for it in PCKs first. pck, sound = streamedFiles[id] extractFile(pck.f,sound.offset,sound.size,name,bankOutPath) except: # Failing that, see if it's a loose file. path=os.path.join(gameRoot,lang,str(id)+ext) if lang!="SFX" else os.path.join(gameRoot,str(id)+ext) if os.path.isfile(path): extractLooseFile(path,name,bankOutPath) else: print("Streamed file %u not found! Have you included all the required package files?" % id) for pck in packages: pck.f.close()