commit 18ca54544051175eb44ffd077ba761fb56d1380e
Author: Stan <stanton.e@gmail.com>
Date:   Sat Jun 29 01:01:51 2024 -0500

    main push to repo

diff --git a/.dockerignore b/.dockerignore
new file mode 100755
index 0000000..0b1e1e7
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,27 @@
+**/__pycache__
+**/.venv
+**/.classpath
+**/.dockerignore
+**/.env
+**/.git
+**/.gitignore
+**/.project
+**/.settings
+**/.toolstarget
+**/.vs
+**/.vscode
+**/*.*proj.user
+**/*.dbmdl
+**/*.jfm
+**/bin
+**/charts
+**/docker-compose*
+**/compose*
+**/Dockerfile*
+**/node_modules
+**/npm-debug.log
+**/obj
+**/secrets.dev.yaml
+**/values.dev.yaml
+LICENSE
+README.md
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100755
index 0000000..f3d8430
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,19 @@
+{
+    "configurations": [
+        {
+            "name": "Docker: Python - General",
+            "type": "docker",
+            "request": "launch",
+            "preLaunchTask": "docker-run: debug",
+            "python": {
+                "pathMappings": [
+                    {
+                        "localRoot": "${workspaceFolder}",
+                        "remoteRoot": "/app"
+                    }
+                ],
+                "projectType": "general"
+            }
+        }
+    ]
+}
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..39c0c4f
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "github.gitAuthentication": false
+}
\ No newline at end of file
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
new file mode 100755
index 0000000..37fc95f
--- /dev/null
+++ b/.vscode/tasks.json
@@ -0,0 +1,26 @@
+{
+	"version": "2.0.0",
+	"tasks": [
+		{
+			"type": "docker-build",
+			"label": "docker-build",
+			"platform": "python",
+			"dockerBuild": {
+				"tag": "fbrowser:latest",
+				"dockerfile": "${workspaceFolder}/Dockerfile",
+				"context": "${workspaceFolder}",
+				"pull": true
+			}
+		},
+		{
+			"type": "docker-run",
+			"label": "docker-run: debug",
+			"dependsOn": [
+				"docker-build"
+			],
+			"python": {
+				"file": "Fbrowser.py"
+			}
+		}
+	]
+}
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100755
index 0000000..109f18f
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,23 @@
+# For more information, please refer to https://aka.ms/vscode-docker-python
+FROM python:3-slim
+
+# Keeps Python from generating .pyc files in the container
+ENV PYTHONDONTWRITEBYTECODE=1
+
+# Turns off buffering for easier container logging
+ENV PYTHONUNBUFFERED=1
+
+# Install pip requirements
+COPY requirements.txt .
+RUN python -m pip install -r requirements.txt
+
+WORKDIR /app
+COPY . /app
+
+# Creates a non-root user with an explicit UID and adds permission to access the /app folder
+# For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
+RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /app
+USER appuser
+
+# During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug
+CMD ["python", "Fbrowser.py"]
diff --git a/Fbrowser.py b/Fbrowser.py
new file mode 100755
index 0000000..0f87ef5
--- /dev/null
+++ b/Fbrowser.py
@@ -0,0 +1,273 @@
+# Path: Fbrowser.py
+# Sample Music Browser & Ogranizer: Main.py
+
+# Importing Libraries
+import sys
+import os
+
+from ScanOrg import organizer, file_scanner, DirectoryFilterProxyModel, FileFilterProxyModel
+from stanzip import Extractor as extractor
+from stanzip import Compressor as compressor
+from stanzip import zipfile, py7zr, rarfile
+from PyQt5.QtGui import QStandardItem , QStandardItemModel, QContextMenuEvent
+from PyQt5.QtWidgets import QApplication, QLabel, QPushButton, QVBoxLayout, QMenu, QTreeView, QMessageBox, QSlider, QWidget, QFileSystemModel, QSplitter, QHBoxLayout, QFileDialog
+from PyQt5.QtMultimedia import QMediaPlaylist, QMediaPlayer, QMediaContent, QAudioFormat, QAudioDeviceInfo, QAudio
+from PyQt5.QtCore import QDir, QSortFilterProxyModel, Qt, QUrl
+from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as MPLCanvas
+from matplotlib.figure import Figure
+
+
+
+# Sample Music Browser Main Class
+class SampleMusicBrowser(QWidget):
+    def __init__(self):
+        super().__init__()
+        self.organizer = organizer()
+        self.extractor = extractor()
+        self.file_model = QStandardItemModel()
+        self.player = QMediaPlayer()
+        self.playlist = QMediaPlaylist()
+        self.player.setPlaylist(self.playlist)
+        self.tree_model = QFileSystemModel()
+
+        self.init_ui()
+        #self.midi_player = MidPlay()
+        self.folder_contents_view.setEditTriggers(QTreeView.NoEditTriggers)
+        self.player.error.connect(self.player_error)
+        self.player.mediaStatusChanged.connect(self.player_media_status_changed)
+        self.player.setAudioRole(QAudio.MusicRole)
+        self.layout = QHBoxLayout()
+        self.canvas = MPLCanvas()
+        self.layout.addWidget(self.canvas)
+        self.setLayout(self.layout)
+
+    # Player Error Debugging
+    def player_error(self, error):
+        try:
+            if error == QMediaPlayer.NoError:
+                return
+            print(f"An error occurred: Code:{error}  {self.player.errorString()}")
+        except Exception as e:
+            print(f"Error: {e}")
+
+    # Media Status Changed Debugging
+    def player_media_status_changed(self, status):
+        if status == QMediaPlayer.NoMedia:
+            return
+        print('Media Status: ' + str(status))
+    
+    
+    def on_extract_button_clicked(self):
+        extraction_directory = QFileDialog.getExistingDirectory(self, "Select Extraction Directory")
+        if extraction_directory:
+            index = self.folder_contents_view.currentIndex()
+            if index.isValid():
+                self.extractor.zipviewer(index, self.file_filter_model, self.list_model, extraction_directory)
+
+    def show_context_menu(self, position):
+        menu = QMenu(self)
+        extract_action = menu.addAction('Extract')
+        extract_action.triggered.connect(self.on_extract_button_clicked)  # Connect to the extraction function
+        menu.exec(self.folder_contents_view.mapToGlobal(position))
+
+    def init_ui(self):
+        layout = QVBoxLayout()
+        label = QLabel('Sample Music Browser')
+        buttons_layout = QHBoxLayout()
+        layout.addWidget(label)
+        
+
+        #self.midi_player = MidPlay()
+        self.file_tree = QTreeView()
+
+        self.file_tree.setHeaderHidden(True)
+        self.file_tree.clicked.connect(self.change_directory)
+
+
+        play_button = QPushButton('Play')
+        play_button.clicked.connect(self.player.play)
+        #play_button.clicked.connect(self.midi_player.play_midi)
+        buttons_layout.addWidget(play_button)
+
+        stop_button = QPushButton('Stop')
+        stop_button.clicked.connect(self.player.stop)
+       # stop_button.clicked.connect(self.midi_player.stop)
+        buttons_layout.addWidget(stop_button)
+        self.player.stateChanged.connect(self.player_state_changed)
+        self.player.positionChanged.connect(self.player_position_changed)
+        self.player.durationChanged.connect(self.player_duration_changed)
+        
+
+        layout.addLayout(buttons_layout)
+
+        self.folder_contents_view = QTreeView()
+        self.folder_contents_view.setHeaderHidden(False)
+        self.folder_contents_view.setRootIsDecorated(False)
+        self.folder_contents_view.setSortingEnabled(True)
+
+        splitter = QSplitter()
+        splitter.addWidget(self.file_tree)
+        splitter.addWidget(self.folder_contents_view)
+        layout.addWidget(splitter)
+        self.current_dir_label = QLabel()
+        layout.addWidget(self.current_dir_label)
+
+        up_dir_button = QPushButton('Up Directory')
+        up_dir_button.clicked.connect(self.go_up_directory)
+        layout.addWidget(up_dir_button)
+
+        forward_button = QPushButton('Forward')
+        forward_button.clicked.connect(self.go_forward_directory)
+        layout.addWidget(forward_button)
+        self.setLayout(layout)
+
+        self.setWindowTitle('Samples are life!')
+        path = QFileDialog.getExistingDirectory(self, 'Select Directory')
+        if path:
+            self.populate_file_tree(path)
+
+
+        self.player.setVolume(50)
+        volume_slider = QSlider(Qt.Horizontal)
+        volume_slider.setRange(0, 100)
+        volume_slider.setValue(50)
+        volume_slider.valueChanged.connect(self.player.setVolume)
+        layout.addWidget(volume_slider)
+        self.playlist.currentIndexChanged.connect(self.playlist_current_index_changed)
+        self.playlist.currentMediaChanged.connect(self.playlist_current_media_changed)
+        self.playlist.mediaInserted.connect(self.playlist_media_inserted)
+        self.playlist.mediaRemoved.connect(self.playlist_media_removed)
+        self.playlist.setPlaybackMode(QMediaPlaylist.Loop)
+        self.folder_contents_view.doubleClicked.connect(self.play_file)
+        self.folder_contents_view.setContextMenuPolicy(Qt.CustomContextMenu)
+        self.folder_contents_view.customContextMenuRequested.connect(self.show_context_menu)
+
+
+    def directory_loaded(self, path):
+        self.file_tree.setRootIndex(self.directory_model.mapFromSource(self.model.index(path)))
+        self.folder_contents_view.setRootIndex(self.file_filter_model.mapFromSource(self.list_model.index(path)))
+
+    def populate_file_tree(self, path):
+        try:
+            self.tree_model.setRootPath(path)
+            self.file_tree.setModel(self.tree_model)
+            self.directory_model = DirectoryFilterProxyModel()
+            self.directory_model.setSourceModel(self.tree_model)
+            self.file_tree.setModel(self.directory_model)
+            self.file_tree.setRootIndex(self.directory_model.mapFromSource(self.tree_model.index(path)))
+            self.list_model = QFileSystemModel()
+            self.list_model.setRootPath(path)
+            self.file_filter_model = FileFilterProxyModel()
+            self.file_filter_model.setSourceModel(self.list_model)
+            self.folder_contents_view.setModel(self.file_filter_model)
+            self.folder_contents_view.setRootIndex(self.file_filter_model.mapFromSource(self.list_model.index(path)))
+            self.current_dir_label.setText(path)
+        except Exception as e:
+            print(f"Error Populating File Tree: {e}")
+
+    def closeEvent(self, event):
+        reply = QMessageBox.question(self, 'Exit', 'Are you sure you want to exit?',
+                                     QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
+        if reply == QMessageBox.Yes:
+            event.accept()
+        else:
+            event.ignore()
+
+    def play_file(self, index):
+        try:
+            index = self.file_filter_model.mapToSource(index)
+            file_path = self.list_model.filePath(index)
+            if file_path.endswith(('.zip', '.rar', '.7z')):
+                with zipfile.ZipFile(file_path, 'r') as zip_ref:
+                    for filename in zip_ref.namelist():                    
+                        if  filename.lower().endswith(('mp3', 'wav', 'ogg', 'flac',
+                                                   'm4a', 'wma', 'aac', 'aiff', 'alac',
+                                                   'mid', 'midi', 'mp4', 'm4a')):
+                            audo_file = zip_ref.extract(filename)
+                            media = QMediaContent(QUrl.fromLocalFile(audo_file))
+                            self.playlist.clear()
+                            self.playlist.addMedia(media)
+                            self.player.play()
+                            break
+                    if os.path.exists(audo_file):
+                        os.remove(audo_file)
+            
+            elif file_path.endswith(('.mid', '.midi')):
+                #self.midi_player = MidPlay()
+                #fig = self.midi_player.play_midi(file_path)
+                self.canvas.draw()
+
+            else:
+                media = QMediaContent(QUrl.fromLocalFile(file_path))
+                self.playlist.clear()
+                self.playlist.addMedia(media)
+                self.player.play()
+        except Exception as e:
+            print(f"Error Playing File: {e}")
+
+    def player_state_changed(self, state):
+        if state == QMediaPlayer.StoppedState:
+            self.playlist.setCurrentIndex(0)
+
+    def player_position_changed(self, position):
+        pass
+    def player_duration_changed(self, duration):
+        pass
+    def playlist_current_index_changed(self, index):
+        pass
+    def playlist_current_media_changed(self, media):
+        pass
+    def playlist_media_inserted(self, start, end):
+        pass
+    def playlist_media_removed(self, start, end):
+        pass
+
+    def change_directory(self, index):
+        index = self.directory_model.mapToSource(index)
+        try:
+            file_path = self.tree_model.filePath(index)
+            self.list_model.setRootPath(file_path)
+            self.current_dir_label.setText(file_path)
+            self.folder_contents_view.setRootIndex(self.file_filter_model.mapFromSource(self.list_model.index(file_path)))
+        except Exception as e:
+            print(f"Error Changing Dirs.: {e}")
+        
+    def go_up_directory(self):
+        index = self.folder_contents_view.rootIndex()
+        index = self.file_filter_model.mapToSource(index)
+        parent_index = index.parent()
+        if parent_index.isValid():  # Check if the parent index is valid
+            self.folder_contents_view.setRootIndex(self.file_filter_model.mapFromSource(parent_index))
+            self.current_dir_label.setText(self.list_model.filePath(parent_index))
+
+    def go_forward_directory(self):
+        index = self.folder_contents_view.rootIndex()
+        index = self.file_filter_model.mapToSource(index)
+        parent_index = index.parent()
+        if parent_index.isValid():
+            self.folder_contents_view.setRootIndex(self.file_filter_model.mapFromSource(parent_index))
+            self.current_dir_label.setText(self.list_model.filePath(parent_index))
+
+
+if __name__ == '__main__':
+   # player = MidPlay()
+   # file_path = list(player.select_file())  # Get the selected file path
+
+   # viewer = MidViewer()
+
+   # viewer.read_midi(file_path)
+   # viewer.view_midi()
+    #viewer.show()
+   # viewer.save('test.png')
+   # viewer.clear()
+   # viewer.close()
+  #  print(viewer.get_midi_info(file_path))  # Use the file path
+  #  print(viewer.get_piano_roll(file_path))  # Use the file path
+   # print(viewer.get_tempo(file_path))  # Use the file path
+  #  print(viewer.get_notes(file_path))  # Use the file path
+    app = QApplication(sys.argv)
+    sampleMusicBrowser = SampleMusicBrowser()
+    sampleMusicBrowser.show()
+
+
+    sys.exit(app.exec_())
\ No newline at end of file
diff --git a/MidPlay.py b/MidPlay.py
new file mode 100755
index 0000000..19f8bd5
--- /dev/null
+++ b/MidPlay.py
@@ -0,0 +1,255 @@
+#Path: MidPlay.py
+# Description: A class to play MIDI files and a class to view MIDI files
+# probably switching to a different library for midi handling
+# pretty_midi is not very good for this purpose or real-time playback of midi files
+
+"""Pretty Midi module type stubs are included but incomplete.
+Pretty Midi comes with a statement to cite the following paper
+when used in a research project:
+
+Colin Raffel and Daniel P. W. Ellis. Intuitive Analysis, 
+Creation and Manipulation of MIDI Data with pretty_midi. 
+In Proceedings of the 15th International Conference on Music
+Information Retrieval Late Breaking and Demo Papers, 2014.
+
+colinraffel.com/publications/ismir2014intuitive.pdf
+
+"""
+import pygame
+# Imports
+import pretty_midi
+import fluidsynth
+import sys
+import os
+from PyQt5.QtWidgets import (QApplication, QLabel, QListWidget, QFileDialog, QMessageBox, QWidget, QPushButton, QHBoxLayout,
+    QVBoxLayout,
+    QProgressBar,
+    QSlider) # structured for readability and to avoid long lines and it annoys my friend XD
+from PyQt5.QtCore import QTimer, Qt
+import threading
+import cProfile # profiler remove for production
+
+pygame.mixer.init()
+pygame.init()
+
+class MidPlayGUI(QWidget):
+    def __init__(self):
+        super().__init__()
+        self.player = MidPlay()
+        self.current_midi_label = QLabel()
+        self.playlist_widget = QListWidget()
+        self.setWindowTitle("MidPlay - Midi Player")
+        self.init_ui()
+        self.timer = QTimer()
+        self.timer.timeout.connect(self.handle_song_end)
+        self.timer.start(1000)
+
+    def set_volume(self, value):
+        volume = value / 100
+        pygame.mixer.music.set_volume(volume)
+
+    def update_progress(self):
+        if self.player.current_midi:
+            current_time = pygame.mixer.music.get_pos() / 1000  # get_pos returns time in milliseconds NOT SECONDS!
+            total_time = self.player.current_midi.get_end_time()
+            progress = current_time / total_time * 100
+            self.progress_bar.setValue(int(progress))
+
+    def handle_song_end(self):
+        if self.player.playing and not pygame.mixer.music.get_busy():
+            self.player.next_song()
+            if self.player.playlist:
+                self.player.current_index %= len(self.player.playlist)
+                filepath = self.player.playlist[self.player.current_index]
+                filename = os.path.basename(filepath)
+                self.current_midi_label.setText(f"Current MIDI: {filename}")
+        self.update_progress()
+        
+
+    def init_ui(self):
+        #label = QLabel("MidPlay - Midi player")
+        #label.setStyleSheet("font-size: 20px; font-weight: bold;")
+
+        self.progress_bar = QProgressBar()
+        self.volume_slider = QSlider(Qt.Horizontal)
+        self.volume_slider.setMinimum(0)
+        self.volume_slider.setMaximum(100)
+        self.volume_slider.setValue(100)
+        self.volume_slider.valueChanged.connect(self.set_volume)
+        self.current_midi_label.setText("Current MIDI: None")
+
+
+        self.playlist_widget.itemDoubleClicked.connect(self.play_selected_song)
+        pygame.mixer.music.set_endevent(pygame.USEREVENT)
+
+        # Buttons
+        play_button = QPushButton("Play")
+        play_button.clicked.connect(self.player.play_midi)
+
+        pause_button = QPushButton("Pause")
+        pause_button.clicked.connect(self.player.pause)
+
+        stop_button = QPushButton("Stop")
+        stop_button.clicked.connect(self.player.stop)
+
+        next_button = QPushButton("Next")
+        next_button.clicked.connect(self.player.next_song)
+
+        back_button = QPushButton("Back")
+        back_button.clicked.connect(self.previous_song)        
+
+        add_button = QPushButton("Add to Playlist")
+        add_button.clicked.connect(self.load_midi_file)
+
+        add_folder_button = QPushButton("Add Folder to Playlist")
+        add_folder_button.clicked.connect(self.load_folder)
+
+        clear_button = QPushButton("Clear Playlist")
+        clear_button.clicked.connect(self.clear_playlist)
+        
+        # Window layout
+        layout = QVBoxLayout()
+        layout.addWidget(self.current_midi_label)
+        layout.addWidget(self.playlist_widget)
+        layout.addWidget(self.progress_bar)
+        layout.addWidget(self.volume_slider)
+        layout.addWidget(play_button)
+        layout.addWidget(pause_button)
+        layout.addWidget(stop_button)
+        layout.addWidget(next_button)
+        layout.addWidget(back_button)
+        layout.addWidget(add_button)
+        layout.addWidget(add_folder_button)
+        layout.addWidget(clear_button)
+
+        progress_volume_layout = QHBoxLayout() 
+        progress_volume_layout.addWidget(self.progress_bar)  
+        progress_volume_layout.addWidget(self.volume_slider)
+        layout.addLayout(progress_volume_layout) 
+        self.setLayout(layout)
+
+    # Event handlers
+    def play_selected_song(self, item):
+        index = self.playlist_widget.row(item)
+        self.current_index = index
+        filepath = self.player.playlist[self.current_index]
+        self.player.load_midi(filepath)    
+        self.player.play_midi() 
+        pygame.mixer.music.set_endevent(pygame.USEREVENT)
+        filename = os.path.basename(filepath)
+        self.current_midi_label.setText(f"Current MIDI: {filename}")
+
+    def load_midi_file(self):
+        filepath, _ = QFileDialog.getOpenFileName(self, "Select MIDI File", filter="MIDI files (*.mid *.midi)")
+        if filepath:
+            filename = os.path.basename(filepath) 
+            self.player.load_midi(filepath)
+            self.current_midi_label.setText(f"Current MIDI: {filename}")
+            self.player.play_midi() 
+            self.playlist_widget.addItem(filename)
+            self.player.add_to_playlist(filepath)
+
+    def load_folder(self):
+        folder = QFileDialog.getExistingDirectory(self, "Select Folder")
+        if folder:
+            for file in os.listdir(folder):
+                if file.endswith((".midi", ".mid")):
+                    filepath = os.path.join(folder, file)
+                    self.playlist_widget.addItem(file)
+                    self.player.add_to_playlist(filepath)  # Only add to playlist, don't load immediately!!!!!!!!!!!!!!!
+
+    #probably should be in the MidPlay class
+                    
+
+    def previous_song(self):
+        if self.player.playlist:
+            filepath = self.player.playlist[self.current_index]
+            filename = os.path.basename(filepath)
+            self.player.current_index = (self.player.current_index - 1) % len(self.player.playlist)
+            self.current_midi_label.setText(f"Current MIDI: {filename}")
+            self.player.play_midi()
+
+    def clear_playlist(self):
+        self.player.clear_playlist()
+        self.playlist_widget.clear()
+
+    def closeEvent(self, event):
+        confirmation = QMessageBox.question(self, "Exit Confirmation", "Are you sure you want to exit?", QMessageBox.Yes | QMessageBox.No)
+        if confirmation == QMessageBox.Yes:
+            pygame.mixer.quit()
+            pygame.quit()
+            event.accept()  
+        else:
+            event.ignore()  
+
+class MidPlay:
+    """The Heart of Midi Playback"""
+
+    def __init__(self):
+        self.playlist = []
+        self.current_midi = None
+        self.playing = False
+        self.current_index = 0
+
+    def load_midi(self, filepath: str) -> None:
+        def load():    
+            try:
+                self.current_midi = pretty_midi.PrettyMIDI(filepath)
+                pygame.mixer.music.load(filepath)
+            except Exception as e:
+                print(f"Error loading MIDI: {e}")
+        threading.Thread(target=load).start()
+
+    def add_to_playlist(self, filepath: str) -> None:
+        self.playlist.append(filepath)
+
+    def clear_playlist(self) -> None:
+        self.playlist = []
+
+    def play_midi(self) -> None:
+        def play():
+            if self.current_midi:
+                self.current_midi.instruments[0].synthesize()
+                pygame.mixer.music.play()
+                self.playing = True
+                pygame.mixer.music.set_endevent(pygame.USEREVENT)
+            else:
+                print("No MIDI file loaded")
+        threading.Thread(target=play).start()
+
+    def pause(self) -> None:
+        pygame.mixer.music.pause()
+        self.playing = False
+
+    def stop(self) -> None:
+        pygame.mixer.music.stop()
+        self.playing = False
+
+
+    def next_song(self) -> None:
+        #print("Debug: next_song() called", self.playlist) debug line
+        if self.playlist:
+            self.current_index = (self.current_index + 1) % len(self.playlist)
+            filepath = self.playlist[self.current_index]
+
+            # If a new MIDI was loaded before the last one ended, respect that as the new playlist start
+            if self.current_midi and self.playing:
+             #   print("Debug: New MIDI loaded before last one ended")  # debug line
+                self.load_midi(filepath)
+                self.play_midi()
+             #   print("Debug: Filepath:", filepath)  # debug line
+             #   print("Debug: Current MIDI:", self.current_midi)  # debug line
+
+if __name__ == '__main__':
+    app = QApplication([])
+    player_gui = MidPlayGUI()
+    player_gui.show()
+    running = True
+    while True:
+        for event in pygame.event.get():
+            if event.type == pygame.USEREVENT:
+                player_gui.player.next_song()
+            if event.type == pygame.QUIT:
+                running = False
+                break
+        app.exec_()
\ No newline at end of file
diff --git a/ScanOrg.py b/ScanOrg.py
new file mode 100755
index 0000000..1a48c33
--- /dev/null
+++ b/ScanOrg.py
@@ -0,0 +1,213 @@
+#Path: ScanOrg.py
+# Description: A class to scan and organize music files
+
+import concurrent.futures
+import threading
+import queue
+import zipfile
+import py7zr
+import rarfile
+import os
+import mutagen
+from PyQt5.QtCore import Qt, QSortFilterProxyModel, QAbstractTableModel, QModelIndex, QVariant, QAbstractItemModel, QFileInfo, QDir, QMimeDatabase, QMimeData, QUrl, QItemSelectionModel, QItemSelection, QItemSelectionRange, QObject, QThread, QTimer, QEventLoop, QCoreApplication, QUrl, pyqtSignal
+
+# Directory Filter Proxy Model
+class DirectoryFilterProxyModel(QSortFilterProxyModel):
+    def __init__(self):
+        super().__init__()
+        self.setFilterCaseSensitivity(Qt.CaseInsensitive)
+        self.setFilterKeyColumn(0)
+    def filterAcceptsRow(self, source_row, source_parent):
+        index = self.sourceModel().index(source_row, 0, source_parent)
+        return self.sourceModel().isDir(index)
+
+# File Filter Proxy Model
+class FileFilterProxyModel(QSortFilterProxyModel):
+    def __init__(self):
+        super().__init__()
+        self.setFilterCaseSensitivity(Qt.CaseInsensitive)
+        self.setFilterKeyColumn(0)
+        self.allowed_extensions = ['.zip', '.mp3', '.wav', '.flac', '.mid', '.midi', '.aiff', '.aif', '.aifc', '.au', '.snd', '.wv', '.wma', '.m4a']
+
+    def filterAcceptsRow(self, source_row, source_parent):
+        index = self.sourceModel().index(source_row, 0, source_parent)
+        if self.sourceModel().isDir(index):
+            return True
+        else:
+            return self.sourceModel().fileName(index).endswith(tuple(self.allowed_extensions))
+
+# File Scan and Organize
+class file_scanner:
+    def __init__(self):
+        self.file_list = []
+        self.cache = {}
+        
+    def scan(self, path):
+        def background_scan(self, path):
+            if path in self.cache:
+                return self.cache[path]
+
+            file_list = []
+            dirs_queue = queue.Queue()
+            dirs_queue.put(path)
+
+            while not dirs_queue.empty():
+                current_path = dirs_queue.get()
+                try:
+                    for root, dirs, files in os.walk(current_path):
+                        for dir in dirs:
+                            dirs_queue.put(os.path.join(root, dir))
+                        for file in files:
+                            if file.endswith(('.mp3', '.wav', '.flac', '.mid', '.midi', '.aiff', '.aif', '.aifc', '.au', '.snd', '.wv', '.wma', '.m4a')):
+                                file_list.append(os.path.join(root, file))
+                    self.cache[current_path] = file_list
+                except (IOError, PermissionError, FileNotFoundError, OSError) as e:
+                    print(f"Error Scanning Files: {e}")
+
+            return file_list
+        
+        file_list = []
+        thread = threading.Thread(target=background_scan, args=(path, file_list))
+        thread.start()
+        return file_list
+    
+    def get_file_list(self):
+        return self.file_list
+    
+    def clear_file_list(self):
+        self.file_list = []
+
+class Extractor:
+    def zipviewer(self, index, file_filter_model, list_model, extraction_directory):
+        if index.isValid() and extraction_directory is not None:
+            index = file_filter_model.mapToSource(index)
+            file_path = list_model.filePath(index)
+
+            try:
+                if file_path.endswith(".zip"):
+                    with zipfile.ZipFile(file_path, 'r') as zip_ref:
+                        self._extract_files(zip_ref, extraction_directory)
+                elif file_path.endswith(".rar"):
+                    with rarfile.RarFile(file_path, 'r') as rar_ref:
+                        self._extract_files(rar_ref, extraction_directory)
+                elif file_path.endswith(".7z"):
+                    with py7zr.SevenZipFile(file_path, 'r') as sevenzip_ref:
+                        self._extract_files(sevenzip_ref, extraction_directory)
+                else:
+                    print(f"Unsupported file format: {file_path}")
+
+            except (zipfile.BadZipFile, zipfile.LargeZipFile) as e:
+                print(f"ZIP Extraction Error: {e}")
+            except (rarfile.RarFileException, rarfile.NotRARFile) as e:
+                print(f"RAR Extraction Error: {e}")
+            except py7zr.exceptions.SevenZipException as e:
+                print(f"7z Extraction Error: {e}")
+            except OSError as e:
+                print(f"Extraction Error: {e}")
+
+    def _extract_files(self, archive_ref, extraction_directory):
+        for filename in archive_ref.namelist():
+            destination = os.path.join(extraction_directory, filename)
+            if os.path.isfile(destination):
+                print(f"File already exists: {destination}")
+            else:
+                os.makedirs(os.path.dirname(destination), exist_ok=True)
+                with open(destination, 'wb') as file:
+                    file.write(archive_ref.read(filename))
+                print(f"Extracted: {filename}")
+
+class organizer:
+    global metadata_queue
+    metadata_queue = queue.Queue()
+    def __init__(self):
+        self.file_list = []
+        self.artist_list = []
+        self.album_list = []
+        self.genre_list = []
+        self.year_list = []
+        self.file_scanner = file_scanner()
+        self.file_info_cache = {}
+
+    def scan(self, path):
+        if path in self.file_scanner.cache:
+            self.file_list = self.file_scanner.cache[path]
+        else:
+            self.file_list = self.file_scanner.scan(path)
+
+    def get_file_list(self):
+        return self.file_list
+    def clear_file_list(self):
+        self.file_list = []
+    def get_artist_list(self):
+        return self.artist_list
+    def get_album_list(self):
+        return self.album_list
+    def get_genre_list(self):
+        return self.genre_list
+    def get_year_list(self):
+        return self.year_list
+    def clear_artist_list(self):
+        self.artist_list = []
+    def clear_album_list(self):
+        self.album_list = []
+    def clear_genre_list(self):
+        self.genre_list = []
+    def clear_year_list(self):
+        self.year_list = []
+
+    def organize(self):
+        results_queue = queue.Queue()
+        metadata = pyqtSignal(dict)
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            futures = []
+            for file in self.file_list:
+                futures.append(executor.submit(self.get_file_info, file, results_queue))
+
+            for future in concurrent.futures.as_completed(futures):
+                try:
+                    metadata = future.result()
+                    if metadata['artist'] not in self.artist_list:
+                        self.artist_list.append(metadata['artist'])
+                    if metadata['album'] not in self.album_list:
+                        self.album_list.append(metadata['album'])
+                    if metadata['genre'] not in self.genre_list:
+                        self.genre_list.append(metadata['genre'])
+                    if metadata['year'] not in self.year_list:
+                        self.year_list.append(metadata['year'])
+                except mutagen.mp3.HeaderNotFoundError:
+                    print('Error: ' + file)
+                    continue
+        while not metadata_queue.put(metadata):
+            pass
+
+    def get_file_info(self, file, results_queue):
+        try:
+            audio = mutagen.File(file)
+            artist = audio['artist'][0]
+            album = audio['album'][0]
+            genre = audio['genre'][0]
+            year = audio['date'][0]
+            if artist not in self.artist_list:
+                self.artist_list.append(artist)
+            if album not in self.album_list:
+                self.album_list.append(album)
+            if genre not in self.genre_list:
+                self.genre_list.append(genre)
+            if year not in self.year_list:
+                self.year_list.append(year)
+            metadata = {
+                'artist': artist,
+                'album': album,
+                'genre': genre,
+                'year': year
+            }
+            self.metadata_extracted.emit(metadata)                
+        except Exception as e:
+            results_queue.put(None)
+            print('Error: ' + file)
+        if os.path.splitext(file)[1] == ('.mp3', '.wav', '.flac', '.m4a', '.wma', 'mid', '.midi'):
+            self.organize_audio()
+            audio = mutagen.File(file)
+
+
+            
\ No newline at end of file
diff --git a/__pycache__/MidPlay.cpython-310.pyc b/__pycache__/MidPlay.cpython-310.pyc
new file mode 100755
index 0000000..00219d1
Binary files /dev/null and b/__pycache__/MidPlay.cpython-310.pyc differ
diff --git a/__pycache__/ScanOrg.cpython-310.pyc b/__pycache__/ScanOrg.cpython-310.pyc
new file mode 100755
index 0000000..be7279a
Binary files /dev/null and b/__pycache__/ScanOrg.cpython-310.pyc differ
diff --git a/compression.py b/compression.py
new file mode 100755
index 0000000..e228356
--- /dev/null
+++ b/compression.py
@@ -0,0 +1,94 @@
+# imports
+import os
+import zipfile
+import rarfile
+import py7zr
+import shutil
+import tarfile
+import argparse
+import tqdm
+from concurrent.futures import ThreadPoolExecutor
+from multiprocessing import pool
+
+# File Compressor
+class Compressor:
+    def __init__(self):
+        pass
+
+    def _compress_folder(self, source_path, archive_file, archive_format):
+        for root, _, files in os.walk(source_path):
+            for file in files:
+                file_path = os.path.join(root, file)
+                archive_path = os.path.relpath(file_path, source_path)
+                self._compress_file(file_path, archive_file, archive_path, archive_format)
+
+    def _compress_file(self, file_path, archive_file, archive_path, archive_format):
+
+        if archive_format == "zip":
+            with open(file_path, 'rb') as file:
+                for chunk in iter(lambda: file.read(1024 * 1024), b''):
+                    archive_file.writestr(archive_path, chunk)
+        
+        elif archive_format == "tar":
+            archive_file.add(file_path, arcname=archive_path)
+        
+        elif archive_format == "7z":
+            archive_file.write(file_path, archive_path)
+
+        else:
+            raise ValueError(f"Unsupported archive format: {archive_format}")
+    
+    def compress(self, source_path, archive_name, archive_format="zip"):
+        pbar = tqdm.tqdm(total=100, unit="B", unit_scale=True, desc="Compressing")
+        supported_formats = ["zip", "tar", "7z"]
+        
+        if archive_format not in supported_formats:
+            raise ValueError(f"Unsupported archive format: {archive_format}")
+        archive_path = os.path.join(os.path.dirname(source_path), f"{archive_name}.{archive_format}")
+
+        # Check if source path exists
+        if not os.path.exists(source_path):
+            print(f"Source path does not exist: {source_path}")
+            return
+
+        # Check if archive path already exists
+        if os.path.exists(archive_path):
+            print(f"Archive path already exists: {archive_path}")
+            return
+        
+        # Open archive file based on format
+        if archive_format == "zip":
+            archive_file = zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED)
+        elif archive_format == "tar":
+            archive_file = tarfile.open(archive_path, mode="w")
+        elif archive_format == "7z":
+            archive_file = py7zr.SevenZipFile(archive_path, mode="w")
+
+        # Compress the source path
+        try:
+            
+            if os.path.isdir(source_path):
+                self._compress_folder(source_path, archive_file, archive_format)
+                pbar.update(1)
+            else:
+                if os.path.isfile(source_path):
+                    self._compress_file(source_path, archive_file, "", archive_format)
+                    pbar.update(1)
+                else:
+                    print(f"Source path is not a file or directory: {source_path}")
+                    return
+        except Exception as e:
+            print(f"Compressed to: {archive_path} error:{e}")
+
+        finally:
+            archive_file.close()  # Ensure closing the archive file
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Compress files")
+    parser.add_argument("source", help="Path to the file or folder to compress")
+    parser.add_argument("archive_name", help="Name for the compressed archive")
+    parser.add_argument("-f", "--format", choices=["zip", "tar", "7z"], default="zip", help="Archive format")
+    args = parser.parse_args()
+
+    compressor = Compressor()
+    compressor.compress(args.source, args.archive_name, args.format)
diff --git a/docker-compose.debug.yml b/docker-compose.debug.yml
new file mode 100755
index 0000000..bf8c4d0
--- /dev/null
+++ b/docker-compose.debug.yml
@@ -0,0 +1,11 @@
+version: '3.4'
+
+services:
+  fbrowser:
+    image: fbrowser
+    build:
+      context: .
+      dockerfile: ./Dockerfile
+    command: ["sh", "-c", "pip install debugpy -t /tmp && python /tmp/debugpy --wait-for-client --listen 0.0.0.0:5678 Fbrowser.py "]
+    ports:
+      - 5678:5678
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100755
index 0000000..a84be70
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,8 @@
+version: '3.4'
+
+services:
+  fbrowser:
+    image: fbrowser
+    build:
+      context: .
+      dockerfile: ./Dockerfile
diff --git a/extraction.py b/extraction.py
new file mode 100755
index 0000000..70e509d
--- /dev/null
+++ b/extraction.py
@@ -0,0 +1,74 @@
+# extractor.py
+import os
+import zipfile
+import rarfile
+import py7zr
+import argparse
+from tqdm import tqdm
+from concurrent.futures import ThreadPoolExecutor
+
+class Extractor:
+
+    def zipviewer(self, source, destination):
+        print(f"checking if {source} exists")
+        if not os.path.exists(source):
+            print(f"Error: Archive file not found: {source}")
+            return
+
+        try:
+            
+            print(f"checking if {destination} exists")
+            if not os.path.exists(destination):
+                print(f"{destination} does not exist, creating {destination}")
+                os.makedirs(destination)
+                print(f"{destination} created")
+            else:
+                print(f"{destination} exists")
+
+            print(f"checking if {source} is a valid archive file")
+            if source.endswith(".zip"):
+                print(f"Extracting all files from {source} to {destination}")
+                with zipfile.ZipFile(source, 'r') as zip_ref:
+                    zip_ref.extractall(destination)
+                    print(f"Extracted all files from {source} to {destination}")
+
+            elif source.endswith(".rar, .tar.gz, .tar.bz2, .tar.xz, .tar.zst"):
+                with rarfile.RarFile(source, 'r') as rar_ref:
+                    rar_ref.extractall(destination)
+                    print(f"Extracted all files from {source} to {destination}")
+
+            elif source.endswith(".7z"):
+                with py7zr.SevenZipFile(source, 'r') as sevenzip_ref:
+                    sevenzip_ref.extractall(destination)
+                    print(f"Extracted all files from {source} to {destination}")
+                    
+            else:
+                print(f"Unsupported file format: {source}")
+
+        except (zipfile.BadZipFile, zipfile.LargeZipFile) as e:
+            print(f"ZIP Extraction Error: {e}")
+        except (rarfile.RarFileException, rarfile.NotRARFile) as e:
+            print(f"RAR Extraction Error: {e}")
+        except py7zr.exceptions.SevenZipException as e:
+            print(f"7z Extraction Error: {e}")
+        except OSError as e:
+            print(f"Extraction Error: {e}")
+
+def main():
+    print("Welcome to the Archive Extractor!")
+    parser = argparse.ArgumentParser(description="Compress or extract files")
+    subparsers = parser.add_subparsers(title="Command", dest="command")
+
+    # Subparser for extraction
+    extract_parser = subparsers.add_parser("extract")
+    extract_parser.add_argument("source", help="Path to the archive file")
+    extract_parser.add_argument("destination", help="Extraction directory")
+    args = parser.parse_args()
+
+    if args.command == "extract":
+        print(f"Extracting {args.source} to {args.destination}")
+        extractor = Extractor()
+        extractor.zipviewer(args.source, args.destination)
+
+if __name__ == "__main__":
+    main()
diff --git a/paq-8l_intel.exe b/paq-8l_intel.exe
new file mode 100755
index 0000000..f6f3684
Binary files /dev/null and b/paq-8l_intel.exe differ
diff --git a/paq7asm-x86_64.asm b/paq7asm-x86_64.asm
new file mode 100755
index 0000000..a0754a6
--- /dev/null
+++ b/paq7asm-x86_64.asm
@@ -0,0 +1,102 @@
+; YASM x86-64 assembly language code for PAQ7/8 ver. 2, Jan 18, 2007
+;
+; (C) 2005-2007, Matt Mahoney, Matthew Fite.
+; This is free software under GPL, http://www.gnu.org/licenses/gpl.txt
+;
+; This code was tested on an Athlon-64 under Ubuntu Linux 2.6.15.27.amd64-generic
+; with paq8f and paq8jd.  It should work with any PAQ version since paq7,
+; because all versions use the same paq7asm.asm code for 32 bit Windows/Linux
+; versions.  To compile e.g. paq8jd in Linux:
+;
+;   yasm paq7asm-x86_64.asm -f elf -m amd64
+;   g++ -O3 -s -fomit-frame-pointer -DUNIX paq8jd.cpp paq7asm-x86_64.o -o paq8jd
+;
+; This code has not been tested in Windows.  (You would need XP Professional
+; 64 bit edition and a 64 bit compiler).
+
+section .text
+
+BITS 64
+
+; Vector product a*b of n signed words, returning signed dword scaled
+; down by 8 bits. n is rounded up to a multiple of 8.
+
+    global dot_product ; (short* a, short* b, int n)
+    align 16
+dot_product:
+    mov rcx, rdx        ; n
+    mov rax, rdi        ; a
+    mov rdx, rsi        ; b
+    add rcx, 7          ; n rounding up
+    and rcx, -8
+    jz .done
+    sub rax, 16
+    sub rdx, 16
+    pxor xmm0, xmm0     ; sum = 0
+.loop:                  ; each loop sums 4 products
+    movdqa xmm1, [rax+rcx*2] ; put parital sums of vector product in xmm1
+    pmaddwd xmm1, [rdx+rcx*2]
+    psrad xmm1, 8
+    paddd xmm0, xmm1
+    sub rcx, 8
+    ja .loop
+    movdqa xmm1, xmm0      ; add 4 parts of xmm0 and return in eax
+    psrldq xmm1, 8
+    paddd xmm0, xmm1
+    movdqa xmm1, xmm0
+    psrldq xmm1, 4
+    paddd xmm0, xmm1
+    movd rax, xmm0
+.done
+    ret
+
+; Train n neural network weights w[n] on inputs t[n] and err.
+; w[i] += (t[i]*err*2 >> 16)+1 >> 1 bounded to +- 32K.
+; n is rounded up to a multiple of 8.
+
+;1st arg rdi -> *t
+;2nd arg rsi -> *w
+;3rd arg rdx ->  n
+;4th arg rcx ->  err (signed 16 bits)
+
+    global train ; (short* t, short* w, int n, int err)
+    BITS 64
+    align 16
+train:
+    mov rax, rcx          ; err
+    and rax, 0xffff       ; put 8 copies of err in xmm0
+    movd xmm0, rax
+    movd xmm1, rax
+    pslldq xmm1, 2
+    por xmm0, xmm1
+    movdqa xmm1, xmm0
+    pslldq xmm1, 4
+    por xmm0, xmm1
+    movdqa xmm1, xmm0
+    pslldq xmm1, 8
+    por xmm0, xmm1;
+    pcmpeqb xmm1, xmm1    ; 8 copies of 1 in xmm1
+    psrlw xmm1, 15
+    mov rcx, rdx          ; n
+    mov rax, rdi          ; t
+    mov rdx, rsi          ; w
+    add rcx, 7            ; n/8 rounding up
+    and rcx, -8
+    sub rax, 16
+    sub rdx, 16
+    jz .done
+    align 16
+.loop:                     ; each iteration adjusts 8 weights
+    movdqa xmm2, [rdx+rcx*2] ; w[i]
+    movdqa xmm3, [rax+rcx*2] ; t[i]
+    paddsw xmm3, xmm3      ; t[i]*2
+    pmulhw xmm3, xmm0      ; t[i]*err*2 >> 16
+    paddsw xmm3, xmm1      ; (t[i]*err*2 >> 16)+1
+    psraw xmm3, 1          ; (t[i]*err*2 >> 16)+1 >> 1
+    paddsw xmm2, xmm3      ; w[i] + xmm3
+    movdqa [rdx+rcx*2], xmm2
+    sub rcx, 8
+    ja .loop
+.done:
+    ret
+
diff --git a/paq7asm.asm b/paq7asm.asm
new file mode 100755
index 0000000..82d55a7
--- /dev/null
+++ b/paq7asm.asm
@@ -0,0 +1,140 @@
+; NASM assembly language code for PAQ7.
+; (C) 2005, Matt Mahoney.
+; This is free software under GPL, http://www.gnu.org/licenses/gpl.txt
+;
+;   MINGW g++:     nasm paq7asm.asm -f win32 --prefix _
+;   DJGPP g++:     nasm paq7asm.asm -f coff  --prefix _
+;   Borland, Mars: nasm paq7asm.asm -f obj   --prefix _
+;   Linux:         nasm paq7asm.asm -f elf
+;
+; For other Windows compilers try -f win32 or -f obj.  Some old versions
+; of Linux should use -f aout instead of -f elf.
+;
+; This code will only work on a Pentium-MMX or higher.  It doesn't
+; use extended (Katmai/SSE) instructions.  It won't work
+; in 64-bit mode.
+
+section .text use32 class=CODE
+
+; Reset after MMX
+global do_emms
+do_emms:
+  emms
+  ret
+
+; Vector product a*b of n signed words, returning signed dword scaled
+; down by 8 bits. n is rounded up to a multiple of 8.
+
+global dot_product ; (short* a, short* b, int n)
+align 16
+dot_product:
+  mov eax, [esp+4]      ; a
+  mov edx, [esp+8]      ; b
+  mov ecx, [esp+12]     ; n
+  add ecx, 7            ; n rounding up
+  and ecx, -8
+  jz .done
+  sub eax, 8
+  sub edx, 8
+  pxor mm0, mm0         ; sum = 0
+.loop:                  ; each loop sums 4 products
+  movq mm1, [eax+ecx*2] ; put halves of vector product in mm0
+  pmaddwd mm1, [edx+ecx*2]
+  movq mm2, [eax+ecx*2-8]
+  pmaddwd mm2, [edx+ecx*2-8]
+  psrad mm1, 8
+  psrad mm2, 8
+  paddd mm0, mm1
+  paddd mm0, mm2
+  sub ecx, 8
+  ja .loop
+  movq mm1, mm0         ; add 2 halves of mm0 and return in eax
+  psrlq mm1, 32
+  paddd mm0, mm1
+  movd eax, mm0
+  emms
+.done
+  ret
+
+; This should work on a Pentium 4 or higher in 32-bit mode,
+; but it isn't much faster than the MMX version so I don't use it.
+
+global dot_product_sse2 ; (short* a, short* b, int n)
+align 16
+dot_product_sse2:
+  mov eax, [esp+4]      ; a
+  mov edx, [esp+8]      ; b
+  mov ecx, [esp+12]     ; n
+  add ecx, 7            ; n rounding up
+  and ecx, -8
+  jz .done
+  sub eax, 16
+  sub edx, 16
+  pxor xmm0, xmm0       ; sum = 0
+.loop:                  ; each loop sums 4 products
+  movdqa xmm1, [eax+ecx*2] ; put parital sums of vector product in xmm0
+  pmaddwd xmm1, [edx+ecx*2]
+  psrad xmm1, 8
+  paddd xmm0, xmm1
+  sub ecx, 8
+  ja .loop
+  movdqa xmm1, xmm0      ; add 4 parts of xmm0 and return in eax
+  psrldq xmm1, 8
+  paddd xmm0, xmm1
+  movdqa xmm1, xmm0
+  psrldq xmm1, 4
+  paddd xmm0, xmm1
+  movd eax, xmm0
+.done
+  ret
+
+
+; Train n neural network weights w[n] on inputs t[n] and err.
+; w[i] += t[i]*err*2+1 >> 17 bounded to +- 32K.
+; n is rounded up to a multiple of 8.
+
+global train ; (short* t, short* w, int n, int err)
+align 16
+train:
+  mov eax, [esp+16]     ; err
+  and eax, 0xffff       ; put 4 copies of err in mm0
+  movd mm0, eax
+  movd mm1, eax
+  psllq mm1, 16
+  por mm0, mm1
+  movq mm1, mm0
+  psllq mm1, 32
+  por mm0, mm1
+  pcmpeqb mm1, mm1      ; 4 copies of 1 in mm1
+  psrlw mm1, 15
+  mov eax, [esp+4]      ; t
+  mov edx, [esp+8]      ; w
+  mov ecx, [esp+12]     ; n
+  add ecx, 7            ; n/8 rounding up
+  and ecx, -8
+  sub eax, 8
+  sub edx, 8
+  jz .done
+.loop:                  ; each iteration adjusts 8 weights
+  movq mm2, [edx+ecx*2] ; w[i]
+  movq mm3, [eax+ecx*2] ; t[i]
+  movq mm4, [edx+ecx*2-8] ; w[i]
+  movq mm5, [eax+ecx*2-8] ; t[i]
+  paddsw mm3, mm3
+  paddsw mm5, mm5
+  pmulhw mm3, mm0
+  pmulhw mm5, mm0
+  paddsw mm3, mm1
+  paddsw mm5, mm1
+  psraw mm3, 1
+  psraw mm5, 1
+  paddsw mm2, mm3
+  paddsw mm4, mm5
+  movq [edx+ecx*2], mm2
+  movq [edx+ecx*2-8], mm4
+  sub ecx, 8
+  ja .loop
+.done:
+  emms
+  ret
+
diff --git a/paq7asmsse.asm b/paq7asmsse.asm
new file mode 100755
index 0000000..98ff613
--- /dev/null
+++ b/paq7asmsse.asm
@@ -0,0 +1,93 @@
+; NASM assembly language code for PAQ7.
+; (C) 2005, Matt Mahoney.
+; train - written by wowtiger, Jan. 30, 2007
+;
+; This is free software under GPL, http://www.gnu.org/licenses/gpl.txt
+;
+; This code is a replacement for paq7asm.asm for newer processors
+; supporting SSE2 instructions.  It is about 1% faster than the
+; equivalent MMX code.  It can be linked with any version of paq7*
+; or paq8*.  Assemble as below, then link following the instructions
+; in the C++ source code, replacing paq7asm.obj with paq7asmsse.obj.
+; No C++ code changes are needed.
+;
+;   MINGW g++:     nasm paq7asmsse.asm -f win32 --prefix _
+;   DJGPP g++:     nasm paq7asmsse.asm -f coff  --prefix _
+;   Borland, Mars: nasm paq7asmsse.asm -f obj   --prefix _
+;   Linux:         nasm paq7asmsse.asm -f elf
+;
+
+section .text use32 class=CODE
+
+; Vector product a*b of n signed words, returning signed dword scaled
+; down by 8 bits. n is rounded up to a multiple of 8.
+
+global dot_product      ; (short* a, short* b, int n)
+align 16
+dot_product:
+  mov eax, [esp+4]      ; a
+  mov edx, [esp+8]      ; b
+  mov ecx, [esp+12]     ; n
+  add ecx, 7            ; n rounding up
+  and ecx, -8
+  jz .done
+  sub eax, 16
+  sub edx, 16
+  pxor xmm0, xmm0       ; sum = 0
+.loop:                  ; each loop sums 4 products
+  movdqa xmm1, [eax+ecx*2] ; put parital sums of vector product in xmm0
+  pmaddwd xmm1, [edx+ecx*2]
+  psrad xmm1, 8
+  paddd xmm0, xmm1
+  sub ecx, 8
+  ja .loop
+  movdqa xmm1, xmm0      ; add 4 parts of xmm0 and return in eax
+  psrldq xmm1, 8
+  paddd xmm0, xmm1
+  movdqa xmm1, xmm0
+  psrldq xmm1, 4
+  paddd xmm0, xmm1
+  movd eax, xmm0
+.done
+  ret
+
+
+; Train n neural network weights w[n] on inputs t[n] and err.
+; w[i] += t[i]*err*2+1 >> 17 bounded to +- 32K.
+; n is rounded up to a multiple of 8.
+
+; Train for SSE2
+; Use this code to get some performance...
+
+global train ; (short* t, short* w, int n, int err)
+align 16
+train:
+  mov eax, [esp+4]      ; t
+  mov edx, [esp+8]      ; w
+  mov ecx, [esp+12]     ; n
+  add ecx, 7            ; n/8 rounding up
+  and ecx, -8
+  jz .done
+  sub eax, 16
+  sub edx, 16
+  movd xmm0, [esp+16]
+  pshuflw xmm0,xmm0,0
+  punpcklqdq xmm0,xmm0
+.loop:                  ; each iteration adjusts 8 weights
+  movdqa xmm3, [eax+ecx*2] 	; t[i]
+  movdqa xmm2, [edx+ecx*2] 	; w[i]
+  paddsw xmm3, xmm3     ; t[i]*2
+  pmulhw xmm3, xmm0     ; t[i]*err*2 >> 16
+  paddsw xmm3, [_mask]	; (t[i]*err*2 >> 16)+1
+  psraw xmm3, 1         ; (t[i]*err*2 >> 16)+1 >> 1
+  paddsw xmm2, xmm3     ; w[i] + xmm3
+  movdqa [edx+ecx*2], xmm2
+  sub ecx, 8
+  ja .loop
+.done:
+  ret
+
+align 16
+_mask	dd	10001h,10001h,10001h,10001h ; 8 copies of 1 in xmm1
+
+
diff --git a/paq8l.cpp b/paq8l.cpp
new file mode 100755
index 0000000..3f69df8
--- /dev/null
+++ b/paq8l.cpp
@@ -0,0 +1,3575 @@
+/* paq8l file compressor/archiver.  Release by Matt Mahoney, Mar. 8, 2007.
+   Updated Apr. 15, 2007 (no change to paq8l.exe).
+
+    Copyright (C) 2006 Matt Mahoney, Serge Osnach, Alexander Ratushnyak,
+    Bill Pettis, Przemyslaw Skibinski, Matthew Fite, wowtiger, Andrew Paterson,
+
+
+    LICENSE
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of
+    the License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details at
+    Visit <http://www.gnu.org/copyleft/gpl.html>.
+
+To install and use in Windows:
+
+- To install, put paq8l.exe or a shortcut to it on your desktop.
+- To compress a file or folder, drop it on the paq8l icon.
+- To decompress, drop a .paq8l file on the icon.
+
+A .paq8l extension is added for compression, removed for decompression.
+The output will go in the same folder as the input.
+
+While paq8l is working, a command window will appear and report
+progress.  When it is done you can close the window by pressing
+ENTER or clicking [X]. 
+
+
+COMMAND LINE INTERFACE
+
+- To install, put paq8l.exe somewhere in your PATH.
+- To compress:      paq8l [-N] file1 [file2...]
+- To decompress:    paq8l [-d] file1.paq8l [dir2]
+- To view contents: more < file1.paq8l
+
+The compressed output file is named by adding ".paq8l" extension to
+the first named file (file1.paq8l).  Each file that exists will be
+added to the archive and its name will be stored without a path.
+The option -N specifies a compression level ranging from -0
+(fastest) to -9 (smallest).  The default is -5.  If there is
+no option and only one file, then the program will pause when
+finished until you press the ENTER key (to support drag and drop).
+If file1.paq8l exists then it is overwritten.
+
+If the first named file ends in ".paq8l" then it is assumed to be
+an archive and the files within are extracted to the same directory
+as the archive unless a different directory (dir2) is specified.
+The -d option forces extraction even if there is not a ".paq8l"
+extension.  If any output file already exists, then it is compared
+with the archive content and the first byte that differs is reported.
+No files are overwritten or deleted.  If there is only one argument
+(no -d or dir2) then the program will pause when finished until
+you press ENTER.
+
+For compression, if any named file is actually a directory, then all
+files and subdirectories are compressed, preserving the directory
+structure, except that empty directories are not stored, and file
+attributes (timestamps, permissions, etc.) are not preserved.
+During extraction, directories are created as needed.  For example:
+
+  paq8l -4 c:\tmp\foo bar
+
+compresses foo and bar (if they exist) to c:\tmp\foo.paq8l at level 4.
+
+  paq8l -d c:\tmp\foo.paq8l .
+
+extracts foo and compares bar in the current directory.  If foo and bar
+are directories then their contents are extracted/compared.
+
+There are no commands to update an existing archive or to extract
+part of an archive.  Files and archives larger than 2GB are not
+supported (but might work on 64-bit machines, not tested).
+File names with nonprintable characters are not supported (spaces
+are OK).
+
+
+TO COMPILE
+
+There are 2 files: paq8l.cpp (C++) and paq7asm.asm (NASM/YASM).
+paq7asm.asm is the same as in paq7 and paq8x.  paq8l.cpp recognizes the
+following compiler options:
+
+  -DWINDOWS           (to compile in Windows)
+  -DUNIX              (to compile in Unix, Linux, Solairs, MacOS/Darwin, etc)
+  -DNOASM             (to replace paq7asm.asm with equivalent C++)
+  -DDEFAULT_OPTION=N  (to change the default compression level from 5 to N).
+
+If you compile without -DWINDOWS or -DUNIX, you can still compress files,
+but you cannot compress directories or create them during extraction.
+You can extract directories if you manually create the empty directories
+first.
+
+Use -DEFAULT_OPTION=N to change the default compression level to support
+drag and drop on machines with less than 256 MB of memory.  Use
+-DDEFAULT_OPTION=4 for 128 MB, 3 for 64 MB, 2 for 32 MB, etc.
+
+Use -DNOASM for non x86-32 machines, or older than a Pentium-MMX (about
+1997), or if you don't have NASM or YASM to assemble paq7asm.asm.  The
+program will still work but it will be slower.  For NASM in Windows,
+use the options "--prefix _" and either "-f win32" or "-f obj" depending
+on your C++ compiler.  In Linux, use "-f elf".
+
+Recommended compiler commands and optimizations:
+
+  MINGW g++:
+    nasm paq7asm.asm -f win32 --prefix _
+    g++ paq8l.cpp -DWINDOWS -O2 -Os -s -march=pentiumpro -fomit-frame-pointer -o paq8l.exe paq7asm.obj
+
+  Borland:
+    nasm paq7asm.asm -f obj --prefix _
+    bcc32 -DWINDOWS -O -w-8027 paq8l.cpp paq7asm.obj
+
+  Mars:
+    nasm paq7asm.asm -f obj --prefix _
+    dmc -DWINDOWS -Ae -O paq8l.cpp paq7asm.obj
+
+  UNIX/Linux (PC):
+    nasm -f elf paq7asm.asm
+    g++ paq8l.cpp -DUNIX -O2 -Os -s -march=pentiumpro -fomit-frame-pointer -o paq8l paq7asm.o
+
+  Non PC (e.g. PowerPC under MacOS X)
+    g++ paq8l.cpp -O2 -DUNIX -DNOASM -s -o paq8l
+
+MinGW produces faster executables than Borland or Mars, but Intel 9
+is about 4% faster than MinGW).
+
+
+ARCHIVE FILE FORMAT
+
+An archive has the following format.  It is intended to be both
+human and machine readable.  The header ends with CTRL-Z (Windows EOF)
+so that the binary compressed data is not displayed on the screen.
+
+  paq8l -N CR LF
+  size TAB filename CR LF
+  size TAB filename CR LF
+  ...
+  CTRL-Z
+  compressed binary data
+
+-N is the option (-0 to -9), even if a default was used.
+Plain file names are stored without a path.  Files in compressed
+directories are stored with path relative to the compressed directory
+(using UNIX style forward slashes "/").  For example, given these files:
+
+  123 C:\dir1\file1.txt
+  456 C:\dir2\file2.txt
+
+Then
+
+  paq8l archive \dir1\file1.txt \dir2
+
+will create archive.paq8l with the header:
+
+  paq8l -5
+  123     file1.txt
+  456     dir2/file2.txt
+
+The command:
+
+  paq8l archive.paq8l C:\dir3
+
+will create the files:
+
+  C:\dir3\file1.txt
+  C:\dir3\dir2\file2.txt
+
+Decompression will fail if the first 7 bytes are not "paq8l -".  Sizes
+are stored as decimal numbers.  CR, LF, TAB, CTRL-Z are ASCII codes
+13, 10, 9, 26 respectively.
+
+
+ARITHMETIC CODING
+
+The binary data is arithmetic coded as the shortest base 256 fixed point
+number x = SUM_i x_i 256^-1-i such that p(<y) <= x < p(<=y), where y is the
+input string, x_i is the i'th coded byte, p(<y) (and p(<=y)) means the
+probability that a string is lexicographcally less than (less than
+or equal to) y according to the model, _ denotes subscript, and ^ denotes
+exponentiation.
+
+The model p(y) for y is a conditional bit stream,
+p(y) = PROD_j p(y_j | y_0..j-1) where y_0..j-1 denotes the first j
+bits of y, and y_j is the next bit.  Compression depends almost entirely
+on the ability to predict the next bit accurately.
+
+
+MODEL MIXING
+
+paq8l uses a neural network to combine a large number of models.  The
+i'th model independently predicts
+p1_i = p(y_j = 1 | y_0..j-1), p0_i = 1 - p1_i.
+The network computes the next bit probabilty
+
+  p1 = squash(SUM_i w_i t_i), p0 = 1 - p1                        (1)
+
+where t_i = stretch(p1_i) is the i'th input, p1_i is the prediction of
+the i'th model, p1 is the output prediction, stretch(p) = ln(p/(1-p)),
+and squash(s) = 1/(1+exp(-s)).  Note that squash() and stretch() are
+inverses of each other.
+
+After bit y_j (0 or 1) is received, the network is trained:
+
+  w_i := w_i + eta t_i (y_j - p1)                                (2)
+
+where eta is an ad-hoc learning rate, t_i is the i'th input, (y_j - p1)
+is the prediction error for the j'th input but, and w_i is the i'th
+weight.  Note that this differs from back propagation:
+
+  w_i := w_i + eta t_i (y_j - p1) p0 p1                          (3)
+
+which is a gradient descent in weight space to minimize root mean square
+error.  Rather, the goal in compression is to minimize coding cost,
+which is -log(p0) if y = 1 or -log(p1) if y = 0.  Taking
+the partial derivative of cost with respect to w_i yields (2).
+
+
+MODELS
+
+Most models are context models.  A function of the context (last few
+bytes) is mapped by a lookup table or hash table to a state which depends
+on the bit history (prior sequence of 0 and 1 bits seen in this context).
+The bit history is then mapped to p1_i by a fixed or adaptive function.
+There are several types of bit history states:
+
+- Run Map. The state is (b,n) where b is the last bit seen (0 or 1) and
+  n is the number of consecutive times this value was seen.  The initial
+  state is (0,0).  The output is computed directly:
+
+    t_i = (2b - 1)K log(n + 1).
+
+  where K is ad-hoc, around 4 to 10.  When bit y_j is seen, the state
+  is updated:
+
+    (b,n) := (b,n+1) if y_j = b, else (y_j,1).
+
+- Stationary Map.  The state is p, initially 1/2.  The output is
+  t_i = stretch(p).  The state is updated at ad-hoc rate K (around 0.01):
+
+    p := p + K(y_j - p)
+
+- Nonstationary Map.  This is a compromise between a stationary map, which
+  assumes uniform statistics, and a run map, which adapts quickly by
+  discarding old statistics.  An 8 bit state represents (n0,n1,h), initially
+  (0,0,0) where:
+
+    n0 is the number of 0 bits seen "recently".
+    n1 is the number of 1 bits seen "recently".
+    n = n0 + n1.
+    h is the full bit history for 0 <= n <= 4,
+      the last bit seen (0 or 1) if 5 <= n <= 15,
+      0 for n >= 16.
+
+  The primaty output is t_i := stretch(sm(n0,n1,h)), where sm(.) is
+  a stationary map with K = 1/256, initiaized to 
+  sm(n0,n1,h) = (n1+(1/64))/(n+2/64).  Four additional inputs are also 
+  be computed to improve compression slightly:
+
+    p1_i = sm(n0,n1,h)
+    p0_i = 1 - p1_i
+    t_i   := stretch(p_1)
+    t_i+1 := K1 (p1_i - p0_i)
+    t_i+2 := K2 stretch(p1) if n0 = 0, -K2 stretch(p1) if n1 = 0, else 0
+    t_i+3 := K3 (-p0_i if n1 = 0, p1_i if n0 = 0, else 0)
+    t_i+4 := K3 (-p0_i if n0 = 0, p1_i if n1 = 0, else 0)
+
+  where K1..K4 are ad-hoc constants.
+
+  h is updated as follows:
+    If n < 4, append y_j to h.
+    Else if n <= 16, set h := y_j.
+    Else h = 0.
+
+  The update rule is biased toward newer data in a way that allows
+  n0 or n1, but not both, to grow large by discarding counts of the
+  opposite bit.  Large counts are incremented probabilistically.
+  Specifically, when y_j = 0 then the update rule is:
+
+    n0 := n0 + 1, n < 29
+          n0 + 1 with probability 2^(27-n0)/2 else n0, 29 <= n0 < 41
+          n0, n = 41.
+    n1 := n1, n1 <= 5
+          round(8/3 lg n1), if n1 > 5
+
+  swapping (n0,n1) when y_j = 1.
+
+  Furthermore, to allow an 8 bit representation for (n0,n1,h), states
+  exceeding the following values of n0 or n1 are replaced with the
+  state with the closest ratio n0:n1 obtained by decrementing the
+  smaller count: (41,0,h), (40,1,h), (12,2,h), (5,3,h), (4,4,h),
+  (3,5,h), (2,12,h), (1,40,h), (0,41,h).  For example:
+  (12,2,1) 0-> (7,1,0) because there is no state (13,2,0).
+
+- Match Model.  The state is (c,b), initially (0,0), where c is 1 if
+  the context was previously seen, else 0, and b is the next bit in
+  this context.  The prediction is:
+
+    t_i := (2b - 1)Kc log(m + 1)
+
+  where m is the length of the context.  The update rule is c := 1,
+  b := y_j.  A match model can be implemented efficiently by storing
+  input in a buffer and storing pointers into the buffer into a hash
+  table indexed by context.  Then c is indicated by a hash table entry
+  and b can be retrieved from the buffer.
+
+
+CONTEXTS
+
+High compression is achieved by combining a large number of contexts.
+Most (not all) contexts start on a byte boundary and end on the bit
+immediately preceding the predicted bit.  The contexts below are
+modeled with both a run map and a nonstationary map unless indicated.
+
+- Order n.  The last n bytes, up to about 16.  For general purpose data.
+  Most of the compression occurs here for orders up to about 6.
+  An order 0 context includes only the 0-7 bits of the partially coded
+  byte and the number of these bits (255 possible values).
+
+- Sparse.  Usually 1 or 2 of the last 8 bytes preceding the byte containing
+  the predicted bit, e.g (2), (3),..., (8), (1,3), (1,4), (1,5), (1,6),
+  (2,3), (2,4), (3,6), (4,8).  The ordinary order 1 and 2 context, (1)
+  or (1,2) are included above.  Useful for binary data.
+
+- Text.  Contexts consists of whole words (a-z, converted to lower case
+  and skipping other values).  Contexts may be sparse, e.g (0,2) meaning
+  the current (partially coded) word and the second word preceding the
+  current one.  Useful contexts are (0), (0,1), (0,1,2), (0,2), (0,3),
+  (0,4).  The preceding byte may or may not be included as context in the
+  current word.
+
+- Formatted text.  The column number (determined by the position of
+  the last linefeed) is combined with other contexts: the charater to
+  the left and the character above it.
+
+- Fixed record length.  The record length is determined by searching for
+  byte sequences with a uniform stride length.  Once this is found, then
+  the record length is combined with the context of the bytes immediately
+  preceding it and the corresponding byte locations in the previous
+  one or two records (as with formatted text).
+
+- Context gap.  The distance to the previous occurrence of the order 1
+  or order 2 context is combined with other low order (1-2) contexts.
+
+- FAX.  For 2-level bitmapped images.  Contexts are the surrounding
+  pixels already seen.  Image width is assumed to be 1728 bits (as
+  in calgary/pic).
+
+- Image.  For uncompressed 24-bit color BMP and TIFF images.  Contexts
+  are the high order bits of the surrounding pixels and linear
+  combinations of those pixels, including other color planes.  The
+  image width is detected from the file header.  When an image is
+  detected, other models are turned off to improve speed.
+
+- JPEG.  Files are further compressed by partially uncompressing back
+  to the DCT coefficients to provide context for the next Huffman code.
+  Only baseline DCT-Huffman coded files are modeled.  (This ia about
+  90% of images, the others are usually progresssive coded).  JPEG images
+  embedded in other files (quite common) are detected by headers.  The
+  baseline JPEG coding process is:
+  - Convert to grayscale and 2 chroma colorspace.
+  - Sometimes downsample the chroma images 2:1 or 4:1 in X and/or Y.
+  - Divide each of the 3 images into 8x8 blocks.
+  - Convert using 2-D discrete cosine transform (DCT) to 64 12-bit signed
+    coefficients.
+  - Quantize the coefficients by integer division (lossy).
+  - Split the image into horizontal slices coded independently, separated
+    by restart codes.
+  - Scan each block starting with the DC (0,0) coefficient in zigzag order
+    to the (7,7) coefficient, interleaving the 3 color components in
+    order to scan the whole image left to right starting at the top.
+  - Subtract the previous DC component from the current in each color.
+  - Code the coefficients using RS codes, where R is a run of R zeros (0-15)
+    and S indicates 0-11 bits of a signed value to follow.  (There is a
+    special RS code (EOB) to indicate the rest of the 64 coefficients are 0).
+  - Huffman code the RS symbol, followed by S literal bits.
+  The most useful contexts are the current partially coded Huffman code
+  (including S following bits) combined with the coefficient position
+  (0-63), color (0-2), and last few RS codes.
+
+- Match.  When a context match of 400 bytes or longer is detected,
+  the next bit of the match is predicted and other models are turned
+  off to improve speed.
+
+- Exe.  When a x86 file (.exe, .obj, .dll) is detected, sparse contexts
+  with gaps of 1-12 selecting only the prefix, opcode, and the bits
+  of the modR/M byte that are relevant to parsing are selected.
+  This model is turned off otherwise.
+
+- Indirect.  The history of the last 1-3 bytes in the context of the
+  last 1-2 bytes is combined with this 1-2 byte context.
+
+- DMC. A bitwise n-th order context is built from a state machine using
+  DMC, described in http://plg.uwaterloo.ca/~ftp/dmc/dmc.c
+  The effect is to extend a single context, one bit at a time and predict
+  the next bit based on the history in this context.  The model here differs
+  in that two predictors are used.  One is a pair of counts as in the original
+  DMC.  The second predictor is a bit history state mapped adaptively to
+  a probability as as in a Nonstationary Map.
+
+ARCHITECTURE
+
+The context models are mixed by several of several hundred neural networks
+selected by a low-order context.  The outputs of these networks are
+combined using a second neural network, then fed through several stages of 
+adaptive probability maps (APM) before arithmetic coding.
+
+For images, only one neural network is used and its context is fixed.
+
+An APM is a stationary map combining a context and an input probability.
+The input probability is stretched and divided into 32 segments to
+combine with other contexts.  The output is interpolated between two
+adjacent quantized values of stretch(p1).  There are 2 APM stages in series:
+
+  p1 := (p1 + 3 APM(order 0, p1)) / 4.
+  p1 := (APM(order 1, p1) + 2 APM(order 2, p1) + APM(order 3, p1)) / 4.
+
+PREPROCESSING
+
+paq8l uses preprocessing transforms on certain data types to improve
+compression.  To improve reliability, the decoding transform is
+tested during compression to ensure that the input file can be
+restored.  If the decoder output is not identical to the input file
+due to a bug, then the transform is abandoned and the data is compressed
+without a transform so that it will still decompress correctly.
+
+The input is split into blocks with the format <type> <decoded size> <data>
+where <type> is 1 byte (0 = no transform), <decoded size> is the size
+of the data after decoding, which may be different than the size of <data>.
+Blocks do not span file boundaries, and have a maximum size of 4MB to
+2GB depending on compression level.  Large files are split into blocks
+of this size.  The preprocessor has 3 parts:
+
+- Detector.  Splits the input into smaller blocks depending on data type.
+
+- Coder.  Input is a block to be compressed.  Output is a temporary
+  file.  The coder determines whether a transform is to be applied
+  based on file type, and if so, which one.  A coder may use lots
+  of resources (memory, time) and make multiple passes through the
+  input file.  The file type is stored (as one byte) during compression.
+
+- Decoder.  Performs the inverse transform of the coder.  It uses few
+  resorces (fast, low memory) and runs in a single pass (stream oriented).
+  It takes input either from a file or the arithmetic decoder.  Each call
+  to the decoder returns a single decoded byte.
+
+The following transforms are used:
+
+- EXE:  CALL (0xE8) and JMP (0xE9) address operands are converted from
+  relative to absolute address.  The transform is to replace the sequence
+  E8/E9 xx xx xx 00/FF by adding file offset modulo 2^25 (signed range,
+  little-endian format).  Data to transform is identified by trying the
+  transform and applying a crude compression test: testing whether the
+  byte following the E8/E8 (LSB of the address) occurred more recently
+  in the transformed data than the original and within 4KB 4 times in
+  a row.  The block ends when this does not happen for 4KB.
+
+- JPEG: detected by SOI and SOF and ending with EOI or any nondecodable
+  data.  No transform is applied.  The purpose is to separate images
+  embedded in execuables to block the EXE transform, and for a future
+  place to insert a transform.
+
+
+IMPLEMENTATION
+
+Hash tables are designed to minimize cache misses, which consume most
+of the CPU time.
+
+Most of the memory is used by the nonstationary context models.
+Contexts are represented by 32 bits, possibly a hash.  These are
+mapped to a bit history, represented by 1 byte.  The hash table is
+organized into 64-byte buckets on cache line boundaries.  Each bucket
+contains 7 x 7 bit histories, 7 16-bit checksums, and a 2 element LRU
+queue packed into one byte.  Each 7 byte element represents 7 histories
+for a context ending on a 3-bit boundary plus 0-2 more bits.  One
+element (for bits 0-1, which have 4 unused bytes) also contains a run model 
+consisting of the last byte seen and a count (as 1 byte each).
+
+Run models use 4 byte hash elements consisting of a 2 byte checksum, a
+repeat count (0-255) and the byte value.  The count also serves as
+a priority.
+
+Stationary models are most appropriate for small contexts, so the
+context is used as a direct table lookup without hashing.
+
+The match model maintains a pointer to the last match until a mismatching
+bit is found.  At the start of the next byte, the hash table is referenced
+to find another match.  The hash table of pointers is updated after each
+whole byte.  There is no checksum.  Collisions are detected by comparing
+the current and matched context in a rotating buffer.
+
+The inner loops of the neural network prediction (1) and training (2)
+algorithms are implemented in MMX assembler, which computes 4 elements
+at a time.  Using assembler is 8 times faster than C++ for this code
+and 1/3 faster overall.  (However I found that SSE2 code on an AMD-64,
+which computes 8 elements at a time, is not any faster).
+
+
+DIFFERENCES FROM PAQ7
+
+An .exe model and filter are added.  Context maps are improved using 16-bit
+checksums to reduce collisions.  The state table uses probabilistic updates
+for large counts, more states that remember the last bit, and decreased
+discounting of the opposite count.  It is implemented as a fixed table.
+There are also many minor changes.
+
+DIFFERENCES FROM PAQ8A
+
+The user interface supports directory compression and drag and drop.
+The preprocessor segments the input into blocks and uses more robust
+EXE detection.  An indirect context model was added.  There is no
+dictionary preprocesor like PAQ8B/C/D/E.
+
+DIFFERENCES FROM PAQ8F
+
+Different models, usually from paq8hp*. Also changed rate from 8 to 7. A bug
+in Array was fixed that caused the program to silently crash upon exit.
+
+DIFFERENCES FROM PAQ8J
+
+1) Slightly improved sparse model. 
+2) Added new family of sparse contexts. Each byte mapped to 3-bit value, where
+different values corresponds to different byte classes. For example, input
+byte 0x00 transformed into 0, all bytes that less then 16 -- into 5, all 
+punctuation marks (ispunct(c)!=0) -- into 2 etc. Then this flags from 11 
+previous bytes combined into 32-bit pseudo-context.
+
+All this improvements gives only 62 byte on BOOK1, but on binaries archive size
+reduced on 1-2%.
+
+DIFFERENCES FROM PAQ8JA
+
+Introduced distance model. Distance model uses distance to last occurence
+of some anchor char ( 0x00, space, newline, 0xff ), combined with previous
+charactes as context. This slightly improves compression of files with
+variable-width record data.
+
+DIFFERENCES FROM PAQ8JB
+
+Restored recordModel(), broken in paq8hp*. Slightly tuned indirectModel(). 
+
+DIFFERENCES FROM PAQ8JC
+
+Changed the APMs in the Predictor. Up to a 0.2% improvement for some files.
+
+DIFFERENCES FROM PAQ8JD
+
+Added DMCModel.  Removed some redundant models from SparseModel and other
+minor tuneups.  Changes introduced in PAQ8K were not carried over.
+
+PAQ8L v.2
+
+Changed Mixer::p() to p() to fix a compiler error in Linux
+(patched by Indrek Kruusa, Apr. 15, 2007).
+
+*/
+
+#define PROGNAME "paq8l"  // Please change this if you change the program.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+#include <ctype.h>
+#define NDEBUG  // remove for debugging (turns on Array bound checks)
+#include <assert.h>
+
+#ifdef UNIX
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <errno.h>
+#endif
+
+#ifdef WINDOWS
+#include <windows.h>
+#endif
+
+#ifndef DEFAULT_OPTION
+#define DEFAULT_OPTION 5
+#endif
+
+// 8, 16, 32 bit unsigned types (adjust as appropriate)
+typedef unsigned char  U8;
+typedef unsigned short U16;
+typedef unsigned int   U32;
+
+// min, max functions
+#ifndef WINDOWS
+inline int min(int a, int b) {return a<b?a:b;}
+inline int max(int a, int b) {return a<b?b:a;}
+#endif
+
+// Error handler: print message if any, and exit
+void quit(const char* message=0) {
+  throw message;
+}
+
+// strings are equal ignoring case?
+int equals(const char* a, const char* b) {
+  assert(a && b);
+  while (*a && *b) {
+    int c1=*a;
+    if (c1>='A'&&c1<='Z') c1+='a'-'A';
+    int c2=*b;
+    if (c2>='A'&&c2<='Z') c2+='a'-'A';
+    if (c1!=c2) return 0;
+    ++a;
+    ++b;
+  }
+  return *a==*b;
+}
+
+//////////////////////// Program Checker /////////////////////
+
+// Track time and memory used
+class ProgramChecker {
+  int memused;  // bytes allocated by Array<T> now
+  int maxmem;   // most bytes allocated ever
+  clock_t start_time;  // in ticks
+public:
+  void alloc(int n) {  // report memory allocated, may be negative
+    memused+=n;
+    if (memused>maxmem) maxmem=memused;
+  }
+  ProgramChecker(): memused(0), maxmem(0) {
+    start_time=clock();
+    assert(sizeof(U8)==1);
+    assert(sizeof(U16)==2);
+    assert(sizeof(U32)==4);
+    assert(sizeof(short)==2);
+    assert(sizeof(int)==4);
+  }
+  void print() const {  // print time and memory used
+    printf("Time %1.2f sec, used %d bytes of memory\n",
+      double(clock()-start_time)/CLOCKS_PER_SEC, maxmem);
+  }
+} programChecker;
+
+//////////////////////////// Array ////////////////////////////
+
+// Array<T, ALIGN> a(n); creates n elements of T initialized to 0 bits.
+// Constructors for T are not called.
+// Indexing is bounds checked if assertions are on.
+// a.size() returns n.
+// a.resize(n) changes size to n, padding with 0 bits or truncating.
+// a.push_back(x) appends x and increases size by 1, reserving up to size*2.
+// a.pop_back() decreases size by 1, does not free memory.
+// Copy and assignment are not supported.
+// Memory is aligned on a ALIGN byte boundary (power of 2), default is none.
+
+template <class T, int ALIGN=0> class Array {
+private:
+  int n;     // user size
+  int reserved;  // actual size
+  char *ptr; // allocated memory, zeroed
+  T* data;   // start of n elements of aligned data
+  void create(int i);  // create with size i
+public:
+  explicit Array(int i=0) {create(i);}
+  ~Array();
+  T& operator[](int i) {
+#ifndef NDEBUG
+    if (i<0 || i>=n) fprintf(stderr, "%d out of bounds %d\n", i, n), quit();
+#endif
+    return data[i];
+  }
+  const T& operator[](int i) const {
+#ifndef NDEBUG
+    if (i<0 || i>=n) fprintf(stderr, "%d out of bounds %d\n", i, n), quit();
+#endif
+    return data[i];
+  }
+  int size() const {return n;}
+  void resize(int i);  // change size to i
+  void pop_back() {if (n>0) --n;}  // decrement size
+  void push_back(const T& x);  // increment size, append x
+private:
+  Array(const Array&);  // no copy or assignment
+  Array& operator=(const Array&);
+};
+
+template<class T, int ALIGN> void Array<T, ALIGN>::resize(int i) {
+  if (i<=reserved) {
+    n=i;
+    return;
+  }
+  char *saveptr=ptr;
+  T *savedata=data;
+  int saven=n;
+  create(i);
+  if (saveptr) {
+    if (savedata) {
+      memcpy(data, savedata, sizeof(T)*min(i, saven));
+      programChecker.alloc(-ALIGN-n*sizeof(T));
+    }
+    free(saveptr);
+  }
+}
+
+template<class T, int ALIGN> void Array<T, ALIGN>::create(int i) {
+  n=reserved=i;
+  if (i<=0) {
+    data=0;
+    ptr=0;
+    return;
+  }
+  const int sz=ALIGN+n*sizeof(T);
+  programChecker.alloc(sz);
+  ptr = (char*)calloc(sz, 1);
+  if (!ptr) quit("Out of memory");
+  data = (ALIGN ? (T*)(ptr+ALIGN-(((long)ptr)&(ALIGN-1))) : (T*)ptr);
+  assert((char*)data>=ptr && (char*)data<=ptr+ALIGN);
+}
+
+template<class T, int ALIGN> Array<T, ALIGN>::~Array() {
+  programChecker.alloc(-ALIGN-n*sizeof(T));
+  free(ptr);
+}
+
+template<class T, int ALIGN> void Array<T, ALIGN>::push_back(const T& x) {
+  if (n==reserved) {
+    int saven=n;
+    resize(max(1, n*2));
+    n=saven;
+  }
+  data[n++]=x;
+}
+
+/////////////////////////// String /////////////////////////////
+
+// A tiny subset of std::string
+// size() includes NUL terminator.
+
+class String: public Array<char> {
+public:
+  const char* c_str() const {return &(*this)[0];}
+  void operator=(const char* s) {
+    resize(strlen(s)+1);
+    strcpy(&(*this)[0], s);
+  }
+  void operator+=(const char* s) {
+    assert(s);
+    pop_back();
+    while (*s) push_back(*s++);
+    push_back(0);
+  }
+  String(const char* s=""): Array<char>(1) {
+    (*this)+=s;
+  }
+};
+
+
+//////////////////////////// rnd ///////////////////////////////
+
+// 32-bit pseudo random number generator
+class Random{
+  Array<U32> table;
+  int i;
+public:
+  Random(): table(64) {
+    table[0]=123456789;
+    table[1]=987654321;
+    for(int j=0; j<62; j++) table[j+2]=table[j+1]*11+table[j]*23/16;
+    i=0;
+  }
+  U32 operator()() {
+    return ++i, table[i&63]=table[i-24&63]^table[i-55&63];
+  }
+} rnd;
+
+////////////////////////////// Buf /////////////////////////////
+
+// Buf(n) buf; creates an array of n bytes (must be a power of 2).
+// buf[i] returns a reference to the i'th byte with wrap (no out of bounds).
+// buf(i) returns i'th byte back from pos (i > 0) 
+// buf.size() returns n.
+
+int pos;  // Number of input bytes in buf (not wrapped)
+
+class Buf {
+  Array<U8> b;
+public:
+  Buf(int i=0): b(i) {}
+  void setsize(int i) {
+    if (!i) return;
+    assert(i>0 && (i&(i-1))==0);
+    b.resize(i);
+  }
+  U8& operator[](int i) {
+    return b[i&b.size()-1];
+  }
+  int operator()(int i) const {
+    assert(i>0);
+    return b[pos-i&b.size()-1];
+  }
+  int size() const {
+    return b.size();
+  }
+};
+
+/////////////////////// Global context /////////////////////////
+
+int level=DEFAULT_OPTION;  // Compression level 0 to 9
+#define MEM (0x10000<<level)
+int y=0;  // Last bit, 0 or 1, set by encoder
+
+// Global context set by Predictor and available to all models.
+int c0=1; // Last 0-7 bits of the partial byte with a leading 1 bit (1-255)
+U32 c4=0; // Last 4 whole bytes, packed.  Last byte is bits 0-7.
+int bpos=0; // bits in c0 (0 to 7)
+Buf buf;  // Rotating input queue set by Predictor
+
+///////////////////////////// ilog //////////////////////////////
+
+// ilog(x) = round(log2(x) * 16), 0 <= x < 64K
+class Ilog {
+  Array<U8> t;
+public:
+  int operator()(U16 x) const {return t[x];}
+  Ilog();
+} ilog;
+
+// Compute lookup table by numerical integration of 1/x
+Ilog::Ilog(): t(65536) {
+  U32 x=14155776;
+  for (int i=2; i<65536; ++i) {
+    x+=774541002/(i*2-1);  // numerator is 2^29/ln 2
+    t[i]=x>>24;
+  }
+}
+
+// llog(x) accepts 32 bits
+inline int llog(U32 x) {
+  if (x>=0x1000000)
+    return 256+ilog(x>>16);
+  else if (x>=0x10000)
+    return 128+ilog(x>>8);
+  else
+    return ilog(x);
+}
+
+///////////////////////// state table ////////////////////////
+
+// State table:
+//   nex(state, 0) = next state if bit y is 0, 0 <= state < 256
+//   nex(state, 1) = next state if bit y is 1
+//   nex(state, 2) = number of zeros in bit history represented by state
+//   nex(state, 3) = number of ones represented
+//
+// States represent a bit history within some context.
+// State 0 is the starting state (no bits seen).
+// States 1-30 represent all possible sequences of 1-4 bits.
+// States 31-252 represent a pair of counts, (n0,n1), the number
+//   of 0 and 1 bits respectively.  If n0+n1 < 16 then there are
+//   two states for each pair, depending on if a 0 or 1 was the last
+//   bit seen.
+// If n0 and n1 are too large, then there is no state to represent this
+// pair, so another state with about the same ratio of n0/n1 is substituted.
+// Also, when a bit is observed and the count of the opposite bit is large,
+// then part of this count is discarded to favor newer data over old.
+
+#if 1 // change to #if 0 to generate this table at run time (4% slower)
+static const U8 State_table[256][4]={
+  {  1,  2, 0, 0},{  3,  5, 1, 0},{  4,  6, 0, 1},{  7, 10, 2, 0}, // 0-3
+  {  8, 12, 1, 1},{  9, 13, 1, 1},{ 11, 14, 0, 2},{ 15, 19, 3, 0}, // 4-7
+  { 16, 23, 2, 1},{ 17, 24, 2, 1},{ 18, 25, 2, 1},{ 20, 27, 1, 2}, // 8-11
+  { 21, 28, 1, 2},{ 22, 29, 1, 2},{ 26, 30, 0, 3},{ 31, 33, 4, 0}, // 12-15
+  { 32, 35, 3, 1},{ 32, 35, 3, 1},{ 32, 35, 3, 1},{ 32, 35, 3, 1}, // 16-19
+  { 34, 37, 2, 2},{ 34, 37, 2, 2},{ 34, 37, 2, 2},{ 34, 37, 2, 2}, // 20-23
+  { 34, 37, 2, 2},{ 34, 37, 2, 2},{ 36, 39, 1, 3},{ 36, 39, 1, 3}, // 24-27
+  { 36, 39, 1, 3},{ 36, 39, 1, 3},{ 38, 40, 0, 4},{ 41, 43, 5, 0}, // 28-31
+  { 42, 45, 4, 1},{ 42, 45, 4, 1},{ 44, 47, 3, 2},{ 44, 47, 3, 2}, // 32-35
+  { 46, 49, 2, 3},{ 46, 49, 2, 3},{ 48, 51, 1, 4},{ 48, 51, 1, 4}, // 36-39
+  { 50, 52, 0, 5},{ 53, 43, 6, 0},{ 54, 57, 5, 1},{ 54, 57, 5, 1}, // 40-43
+  { 56, 59, 4, 2},{ 56, 59, 4, 2},{ 58, 61, 3, 3},{ 58, 61, 3, 3}, // 44-47
+  { 60, 63, 2, 4},{ 60, 63, 2, 4},{ 62, 65, 1, 5},{ 62, 65, 1, 5}, // 48-51
+  { 50, 66, 0, 6},{ 67, 55, 7, 0},{ 68, 57, 6, 1},{ 68, 57, 6, 1}, // 52-55
+  { 70, 73, 5, 2},{ 70, 73, 5, 2},{ 72, 75, 4, 3},{ 72, 75, 4, 3}, // 56-59
+  { 74, 77, 3, 4},{ 74, 77, 3, 4},{ 76, 79, 2, 5},{ 76, 79, 2, 5}, // 60-63
+  { 62, 81, 1, 6},{ 62, 81, 1, 6},{ 64, 82, 0, 7},{ 83, 69, 8, 0}, // 64-67
+  { 84, 71, 7, 1},{ 84, 71, 7, 1},{ 86, 73, 6, 2},{ 86, 73, 6, 2}, // 68-71
+  { 44, 59, 5, 3},{ 44, 59, 5, 3},{ 58, 61, 4, 4},{ 58, 61, 4, 4}, // 72-75
+  { 60, 49, 3, 5},{ 60, 49, 3, 5},{ 76, 89, 2, 6},{ 76, 89, 2, 6}, // 76-79
+  { 78, 91, 1, 7},{ 78, 91, 1, 7},{ 80, 92, 0, 8},{ 93, 69, 9, 0}, // 80-83
+  { 94, 87, 8, 1},{ 94, 87, 8, 1},{ 96, 45, 7, 2},{ 96, 45, 7, 2}, // 84-87
+  { 48, 99, 2, 7},{ 48, 99, 2, 7},{ 88,101, 1, 8},{ 88,101, 1, 8}, // 88-91
+  { 80,102, 0, 9},{103, 69,10, 0},{104, 87, 9, 1},{104, 87, 9, 1}, // 92-95
+  {106, 57, 8, 2},{106, 57, 8, 2},{ 62,109, 2, 8},{ 62,109, 2, 8}, // 96-99
+  { 88,111, 1, 9},{ 88,111, 1, 9},{ 80,112, 0,10},{113, 85,11, 0}, // 100-103
+  {114, 87,10, 1},{114, 87,10, 1},{116, 57, 9, 2},{116, 57, 9, 2}, // 104-107
+  { 62,119, 2, 9},{ 62,119, 2, 9},{ 88,121, 1,10},{ 88,121, 1,10}, // 108-111
+  { 90,122, 0,11},{123, 85,12, 0},{124, 97,11, 1},{124, 97,11, 1}, // 112-115
+  {126, 57,10, 2},{126, 57,10, 2},{ 62,129, 2,10},{ 62,129, 2,10}, // 116-119
+  { 98,131, 1,11},{ 98,131, 1,11},{ 90,132, 0,12},{133, 85,13, 0}, // 120-123
+  {134, 97,12, 1},{134, 97,12, 1},{136, 57,11, 2},{136, 57,11, 2}, // 124-127
+  { 62,139, 2,11},{ 62,139, 2,11},{ 98,141, 1,12},{ 98,141, 1,12}, // 128-131
+  { 90,142, 0,13},{143, 95,14, 0},{144, 97,13, 1},{144, 97,13, 1}, // 132-135
+  { 68, 57,12, 2},{ 68, 57,12, 2},{ 62, 81, 2,12},{ 62, 81, 2,12}, // 136-139
+  { 98,147, 1,13},{ 98,147, 1,13},{100,148, 0,14},{149, 95,15, 0}, // 140-143
+  {150,107,14, 1},{150,107,14, 1},{108,151, 1,14},{108,151, 1,14}, // 144-147
+  {100,152, 0,15},{153, 95,16, 0},{154,107,15, 1},{108,155, 1,15}, // 148-151
+  {100,156, 0,16},{157, 95,17, 0},{158,107,16, 1},{108,159, 1,16}, // 152-155
+  {100,160, 0,17},{161,105,18, 0},{162,107,17, 1},{108,163, 1,17}, // 156-159
+  {110,164, 0,18},{165,105,19, 0},{166,117,18, 1},{118,167, 1,18}, // 160-163
+  {110,168, 0,19},{169,105,20, 0},{170,117,19, 1},{118,171, 1,19}, // 164-167
+  {110,172, 0,20},{173,105,21, 0},{174,117,20, 1},{118,175, 1,20}, // 168-171
+  {110,176, 0,21},{177,105,22, 0},{178,117,21, 1},{118,179, 1,21}, // 172-175
+  {110,180, 0,22},{181,115,23, 0},{182,117,22, 1},{118,183, 1,22}, // 176-179
+  {120,184, 0,23},{185,115,24, 0},{186,127,23, 1},{128,187, 1,23}, // 180-183
+  {120,188, 0,24},{189,115,25, 0},{190,127,24, 1},{128,191, 1,24}, // 184-187
+  {120,192, 0,25},{193,115,26, 0},{194,127,25, 1},{128,195, 1,25}, // 188-191
+  {120,196, 0,26},{197,115,27, 0},{198,127,26, 1},{128,199, 1,26}, // 192-195
+  {120,200, 0,27},{201,115,28, 0},{202,127,27, 1},{128,203, 1,27}, // 196-199
+  {120,204, 0,28},{205,115,29, 0},{206,127,28, 1},{128,207, 1,28}, // 200-203
+  {120,208, 0,29},{209,125,30, 0},{210,127,29, 1},{128,211, 1,29}, // 204-207
+  {130,212, 0,30},{213,125,31, 0},{214,137,30, 1},{138,215, 1,30}, // 208-211
+  {130,216, 0,31},{217,125,32, 0},{218,137,31, 1},{138,219, 1,31}, // 212-215
+  {130,220, 0,32},{221,125,33, 0},{222,137,32, 1},{138,223, 1,32}, // 216-219
+  {130,224, 0,33},{225,125,34, 0},{226,137,33, 1},{138,227, 1,33}, // 220-223
+  {130,228, 0,34},{229,125,35, 0},{230,137,34, 1},{138,231, 1,34}, // 224-227
+  {130,232, 0,35},{233,125,36, 0},{234,137,35, 1},{138,235, 1,35}, // 228-231
+  {130,236, 0,36},{237,125,37, 0},{238,137,36, 1},{138,239, 1,36}, // 232-235
+  {130,240, 0,37},{241,125,38, 0},{242,137,37, 1},{138,243, 1,37}, // 236-239
+  {130,244, 0,38},{245,135,39, 0},{246,137,38, 1},{138,247, 1,38}, // 240-243
+  {140,248, 0,39},{249,135,40, 0},{250, 69,39, 1},{ 80,251, 1,39}, // 244-247
+  {140,252, 0,40},{249,135,41, 0},{250, 69,40, 1},{ 80,251, 1,40}, // 248-251
+  {140,252, 0,41}};  // 252, 253-255 are reserved
+
+#define nex(state,sel) State_table[state][sel]
+
+// The code used to generate the above table at run time (4% slower).
+// To print the table, uncomment the 4 lines of print statements below.
+// In this code x,y = n0,n1 is the number of 0,1 bits represented by a state.
+#else
+
+class StateTable {
+  Array<U8> ns;  // state*4 -> next state if 0, if 1, n0, n1
+  enum {B=5, N=64}; // sizes of b, t
+  static const int b[B];  // x -> max y, y -> max x
+  static U8 t[N][N][2];  // x,y -> state number, number of states
+  int num_states(int x, int y);  // compute t[x][y][1]
+  void discount(int& x);  // set new value of x after 1 or y after 0
+  void next_state(int& x, int& y, int b);  // new (x,y) after bit b
+public:
+  int operator()(int state, int sel) {return ns[state*4+sel];}
+  StateTable();
+} nex;
+
+const int StateTable::b[B]={42,41,13,6,5};  // x -> max y, y -> max x
+U8 StateTable::t[N][N][2];
+
+int StateTable::num_states(int x, int y) {
+  if (x<y) return num_states(y, x);
+  if (x<0 || y<0 || x>=N || y>=N || y>=B || x>=b[y]) return 0;
+
+  // States 0-30 are a history of the last 0-4 bits
+  if (x+y<=4) {  // x+y choose x = (x+y)!/x!y!
+    int r=1;
+    for (int i=x+1; i<=x+y; ++i) r*=i;
+    for (int i=2; i<=y; ++i) r/=i;
+    return r;
+  }
+
+  // States 31-255 represent a 0,1 count and possibly the last bit
+  // if the state is reachable by either a 0 or 1.
+  else
+    return 1+(y>0 && x+y<16);
+}
+
+// New value of count x if the opposite bit is observed
+void StateTable::discount(int& x) {
+  if (x>2) x=ilog(x)/6-1;
+}
+
+// compute next x,y (0 to N) given input b (0 or 1)
+void StateTable::next_state(int& x, int& y, int b) {
+  if (x<y)
+    next_state(y, x, 1-b);
+  else {
+    if (b) {
+      ++y;
+      discount(x);
+    }
+    else {
+      ++x;
+      discount(y);
+    }
+    while (!t[x][y][1]) {
+      if (y<2) --x;
+      else {
+        x=(x*(y-1)+(y/2))/y;
+        --y;
+      }
+    }
+  }
+}
+
+// Initialize next state table ns[state*4] -> next if 0, next if 1, x, y
+StateTable::StateTable(): ns(1024) {
+
+  // Assign states
+  int state=0;
+  for (int i=0; i<256; ++i) {
+    for (int y=0; y<=i; ++y) {
+      int x=i-y;
+      int n=num_states(x, y);
+      if (n) {
+        t[x][y][0]=state;
+        t[x][y][1]=n;
+        state+=n;
+      }
+    }
+  }
+
+  // Print/generate next state table
+  state=0;
+  for (int i=0; i<N; ++i) {
+    for (int y=0; y<=i; ++y) {
+      int x=i-y;
+      for (int k=0; k<t[x][y][1]; ++k) {
+        int x0=x, y0=y, x1=x, y1=y;  // next x,y for input 0,1
+        int ns0=0, ns1=0;
+        if (state<15) {
+          ++x0;
+          ++y1;
+          ns0=t[x0][y0][0]+state-t[x][y][0];
+          ns1=t[x1][y1][0]+state-t[x][y][0];
+          if (x>0) ns1+=t[x-1][y+1][1];
+          ns[state*4]=ns0;
+          ns[state*4+1]=ns1;
+          ns[state*4+2]=x;
+          ns[state*4+3]=y;
+        }
+        else if (t[x][y][1]) {
+          next_state(x0, y0, 0);
+          next_state(x1, y1, 1);
+          ns[state*4]=ns0=t[x0][y0][0];
+          ns[state*4+1]=ns1=t[x1][y1][0]+(t[x1][y1][1]>1);
+          ns[state*4+2]=x;
+          ns[state*4+3]=y;
+        }
+          // uncomment to print table above
+//        printf("{%3d,%3d,%2d,%2d},", ns[state*4], ns[state*4+1], 
+//          ns[state*4+2], ns[state*4+3]);
+//        if (state%4==3) printf(" // %d-%d\n  ", state-3, state);
+        assert(state>=0 && state<256);
+        assert(t[x][y][1]>0);
+        assert(t[x][y][0]<=state);
+        assert(t[x][y][0]+t[x][y][1]>state);
+        assert(t[x][y][1]<=6);
+        assert(t[x0][y0][1]>0);
+        assert(t[x1][y1][1]>0);
+        assert(ns0-t[x0][y0][0]<t[x0][y0][1]);
+        assert(ns0-t[x0][y0][0]>=0);
+        assert(ns1-t[x1][y1][0]<t[x1][y1][1]);
+        assert(ns1-t[x1][y1][0]>=0);
+        ++state;
+      }
+    }
+  }
+//  printf("%d states\n", state); exit(0);  // uncomment to print table above
+}
+
+#endif
+
+///////////////////////////// Squash //////////////////////////////
+
+// return p = 1/(1 + exp(-d)), d scaled by 8 bits, p scaled by 12 bits
+int squash(int d) {
+  static const int t[33]={
+    1,2,3,6,10,16,27,45,73,120,194,310,488,747,1101,
+    1546,2047,2549,2994,3348,3607,3785,3901,3975,4022,
+    4050,4068,4079,4085,4089,4092,4093,4094};
+  if (d>2047) return 4095;
+  if (d<-2047) return 0;
+  int w=d&127;
+  d=(d>>7)+16;
+  return (t[d]*(128-w)+t[(d+1)]*w+64) >> 7;
+}
+
+//////////////////////////// Stretch ///////////////////////////////
+
+// Inverse of squash. d = ln(p/(1-p)), d scaled by 8 bits, p by 12 bits.
+// d has range -2047 to 2047 representing -8 to 8.  p has range 0 to 4095.
+
+class Stretch {
+  Array<short> t;
+public:
+  Stretch();
+  int operator()(int p) const {
+    assert(p>=0 && p<4096);
+    return t[p];
+  }
+} stretch;
+
+Stretch::Stretch(): t(4096) {
+  int pi=0;
+  for (int x=-2047; x<=2047; ++x) {  // invert squash()
+    int i=squash(x);
+    for (int j=pi; j<=i; ++j)
+      t[j]=x;
+    pi=i+1;
+  }
+  t[4095]=2047;
+}
+
+//////////////////////////// Mixer /////////////////////////////
+
+// Mixer m(N, M, S=1, w=0) combines models using M neural networks with
+//   N inputs each, of which up to S may be selected.  If S > 1 then
+//   the outputs of these neural networks are combined using another
+//   neural network (with parameters S, 1, 1).  If S = 1 then the
+//   output is direct.  The weights are initially w (+-32K).
+//   It is used as follows:
+// m.update() trains the network where the expected output is the
+//   last bit (in the global variable y).
+// m.add(stretch(p)) inputs prediction from one of N models.  The
+//   prediction should be positive to predict a 1 bit, negative for 0,
+//   nominally +-256 to +-2K.  The maximum allowed value is +-32K but
+//   using such large values may cause overflow if N is large.
+// m.set(cxt, range) selects cxt as one of 'range' neural networks to
+//   use.  0 <= cxt < range.  Should be called up to S times such
+//   that the total of the ranges is <= M.
+// m.p() returns the output prediction that the next bit is 1 as a
+//   12 bit number (0 to 4095).
+
+// dot_product returns dot product t*w of n elements.  n is rounded
+// up to a multiple of 8.  Result is scaled down by 8 bits.
+#ifdef NOASM  // no assembly language
+int dot_product(short *t, short *w, int n) {
+  int sum=0;
+  n=(n+7)&-8;
+  for (int i=0; i<n; i+=2)
+    sum+=(t[i]*w[i]+t[i+1]*w[i+1]) >> 8;
+  return sum;
+}
+#else  // The NASM version uses MMX and is about 8 times faster.
+extern "C" int dot_product(short *t, short *w, int n);  // in NASM
+#endif
+
+// Train neural network weights w[n] given inputs t[n] and err.
+// w[i] += t[i]*err, i=0..n-1.  t, w, err are signed 16 bits (+- 32K).
+// err is scaled 16 bits (representing +- 1/2).  w[i] is clamped to +- 32K
+// and rounded.  n is rounded up to a multiple of 8.
+#ifdef NOASM
+void train(short *t, short *w, int n, int err) {
+  n=(n+7)&-8;
+  for (int i=0; i<n; ++i) {
+    int wt=w[i]+((t[i]*err*2>>16)+1>>1);
+    if (wt<-32768) wt=-32768;
+    if (wt>32767) wt=32767;
+    w[i]=wt;
+  }
+}
+#else
+extern "C" void train(short *t, short *w, int n, int err);  // in NASM
+#endif
+
+class Mixer {
+  const int N, M, S;   // max inputs, max contexts, max context sets
+  Array<short, 16> tx; // N inputs from add()
+  Array<short, 16> wx; // N*M weights
+  Array<int> cxt;  // S contexts
+  int ncxt;        // number of contexts (0 to S)
+  int base;        // offset of next context
+  int nx;          // Number of inputs in tx, 0 to N
+  Array<int> pr;   // last result (scaled 12 bits)
+  Mixer* mp;       // points to a Mixer to combine results
+public:
+  Mixer(int n, int m, int s=1, int w=0);
+
+  // Adjust weights to minimize coding cost of last prediction
+  void update() {
+    for (int i=0; i<ncxt; ++i) {
+      int err=((y<<12)-pr[i])*7;
+      assert(err>=-32768 && err<32768);
+      train(&tx[0], &wx[cxt[i]*N], nx, err);
+    }
+    nx=base=ncxt=0;
+  }
+
+  // Input x (call up to N times)
+  void add(int x) {
+    assert(nx<N);
+    tx[nx++]=x;
+  }
+
+  // Set a context (call S times, sum of ranges <= M)
+  void set(int cx, int range) {
+    assert(range>=0);
+    assert(ncxt<S);
+    assert(cx>=0);
+    assert(base+cx<M);
+    cxt[ncxt++]=base+cx;
+    base+=range;
+  }
+
+  // predict next bit
+  int p() {
+    while (nx&7) tx[nx++]=0;  // pad
+    if (mp) {  // combine outputs
+      mp->update();
+      for (int i=0; i<ncxt; ++i) {
+        pr[i]=squash(dot_product(&tx[0], &wx[cxt[i]*N], nx)>>5);
+        mp->add(stretch(pr[i]));
+      }
+      mp->set(0, 1);
+      return mp->p();
+    }
+    else {  // S=1 context
+      return pr[0]=squash(dot_product(&tx[0], &wx[0], nx)>>8);
+    }
+  }
+  ~Mixer();
+};
+
+Mixer::~Mixer() {
+  delete mp;
+}
+
+
+Mixer::Mixer(int n, int m, int s, int w):
+    N((n+7)&-8), M(m), S(s), tx(N), wx(N*M),
+    cxt(S), ncxt(0), base(0), nx(0), pr(S), mp(0) {
+  assert(n>0 && N>0 && (N&7)==0 && M>0);
+  for (int i=0; i<S; ++i)
+    pr[i]=2048;
+  for (int i=0; i<N*M; ++i)
+    wx[i]=w;
+  if (S>1) mp=new Mixer(S, 1, 1, 0x7fff);
+}
+
+//////////////////////////// APM //////////////////////////////
+
+// APM maps a probability and a context into a new probability
+// that bit y will next be 1.  After each guess it updates
+// its state to improve future guesses.  Methods:
+//
+// APM a(N) creates with N contexts, uses 66*N bytes memory.
+// a.p(pr, cx, rate=7) returned adjusted probability in context cx (0 to
+//   N-1).  rate determines the learning rate (smaller = faster, default 7).
+//   Probabilities are scaled 12 bits (0-4095).
+
+class APM {
+  int index;     // last p, context
+  const int N;   // number of contexts
+  Array<U16> t;  // [N][33]:  p, context -> p
+public:
+  APM(int n);
+  int p(int pr=2048, int cxt=0, int rate=7) {
+    assert(pr>=0 && pr<4096 && cxt>=0 && cxt<N && rate>0 && rate<32);
+    pr=stretch(pr);
+    int g=(y<<16)+(y<<rate)-y-y;
+    t[index] += g-t[index] >> rate;
+    t[index+1] += g-t[index+1] >> rate;
+    const int w=pr&127;  // interpolation weight (33 points)
+    index=(pr+2048>>7)+cxt*33;
+    return t[index]*(128-w)+t[index+1]*w >> 11;
+  }
+};
+
+// maps p, cxt -> p initially
+APM::APM(int n): index(0), N(n), t(n*33) {
+  for (int i=0; i<N; ++i)
+    for (int j=0; j<33; ++j)
+      t[i*33+j] = i==0 ? squash((j-16)*128)*16 : t[j];
+}
+
+//////////////////////////// StateMap //////////////////////////
+
+// A StateMap maps a nonstationary counter state to a probability.
+// After each mapping, the mapping is adjusted to improve future
+// predictions.  Methods:
+//
+// sm.p(cx) converts state cx (0-255) to a probability (0-4095).
+
+// Counter state -> probability * 4096
+class StateMap {
+protected:
+  int cxt;  // context
+  Array<U16> t; // 256 states -> probability * 64K
+public:
+  StateMap();
+  int p(int cx) {
+    assert(cx>=0 && cx<t.size());
+    t[cxt]+=(y<<16)-t[cxt]+128 >> 8;
+    return t[cxt=cx] >> 4;
+  }
+};
+
+StateMap::StateMap(): cxt(0), t(256) {
+  for (int i=0; i<256; ++i) {
+    int n0=nex(i,2);
+    int n1=nex(i,3);
+    if (n0==0) n1*=64;
+    if (n1==0) n0*=64;
+    t[i] = 65536*(n1+1)/(n0+n1+2);
+  }
+}
+
+//////////////////////////// hash //////////////////////////////
+
+// Hash 2-5 ints.
+inline U32 hash(U32 a, U32 b, U32 c=0xffffffff, U32 d=0xffffffff,
+    U32 e=0xffffffff) {
+  U32 h=a*200002979u+b*30005491u+c*50004239u+d*70004807u+e*110002499u;
+  return h^h>>9^a>>2^b>>3^c>>4^d>>5^e>>6;
+}
+
+///////////////////////////// BH ////////////////////////////////
+
+// A BH maps a 32 bit hash to an array of B bytes (checksum and B-2 values)
+//
+// BH bh(N); creates N element table with B bytes each.
+//   N must be a power of 2.  The first byte of each element is
+//   reserved for a checksum to detect collisions.  The remaining
+//   B-1 bytes are values, prioritized by the first value.  This
+//   byte is 0 to mark an unused element.
+//   
+// bh[i] returns a pointer to the i'th element, such that
+//   bh[i][0] is a checksum of i, bh[i][1] is the priority, and
+//   bh[i][2..B-1] are other values (0-255).
+//   The low lg(n) bits as an index into the table.
+//   If a collision is detected, up to M nearby locations in the same
+//   cache line are tested and the first matching checksum or
+//   empty element is returned.
+//   If no match or empty element is found, then the lowest priority
+//   element is replaced.
+
+// 2 byte checksum with LRU replacement (except last 2 by priority)
+template <int B> class BH {
+  enum {M=8};  // search limit
+  Array<U8, 64> t; // elements
+  U32 n; // size-1
+public:
+  BH(int i): t(i*B), n(i-1) {
+    assert(B>=2 && i>0 && (i&(i-1))==0); // size a power of 2?
+  }
+  U8* operator[](U32 i);
+};
+
+template <int B>
+inline  U8* BH<B>::operator[](U32 i) {
+  int chk=(i>>16^i)&0xffff;
+  i=i*M&n;
+  U8 *p;
+  U16 *cp;
+  int j;
+  for (j=0; j<M; ++j) {
+    p=&t[(i+j)*B];
+    cp=(U16*)p;
+    if (p[2]==0) *cp=chk;
+    if (*cp==chk) break;  // found
+  }
+  if (j==0) return p+1;  // front
+  static U8 tmp[B];  // element to move to front
+  if (j==M) {
+    --j;
+    memset(tmp, 0, B);
+    *(U16*)tmp=chk;
+    if (M>2 && t[(i+j)*B+2]>t[(i+j-1)*B+2]) --j;
+  }
+  else memcpy(tmp, cp, B);
+  memmove(&t[(i+1)*B], &t[i*B], j*B);
+  memcpy(&t[i*B], tmp, B);
+  return &t[i*B+1];
+}
+
+/////////////////////////// ContextMap /////////////////////////
+//
+// A ContextMap maps contexts to a bit histories and makes predictions
+// to a Mixer.  Methods common to all classes:
+//
+// ContextMap cm(M, C); creates using about M bytes of memory (a power
+//   of 2) for C contexts.
+// cm.set(cx);  sets the next context to cx, called up to C times
+//   cx is an arbitrary 32 bit value that identifies the context.
+//   It should be called before predicting the first bit of each byte.
+// cm.mix(m) updates Mixer m with the next prediction.  Returns 1
+//   if context cx is found, else 0.  Then it extends all the contexts with
+//   global bit y.  It should be called for every bit:
+//
+//     if (bpos==0) 
+//       for (int i=0; i<C; ++i) cm.set(cxt[i]);
+//     cm.mix(m);
+//
+// The different types are as follows:
+//
+// - RunContextMap.  The bit history is a count of 0-255 consecutive
+//     zeros or ones.  Uses 4 bytes per whole byte context.  C=1.
+//     The context should be a hash.
+// - SmallStationaryContextMap.  0 <= cx < M/512.
+//     The state is a 16-bit probability that is adjusted after each
+//     prediction.  C=1.
+// - ContextMap.  For large contexts, C >= 1.  Context need not be hashed.
+
+// Predict to mixer m from bit history state s, using sm to map s to
+// a probability.
+inline int mix2(Mixer& m, int s, StateMap& sm) {
+  int p1=sm.p(s);
+  int n0=-!nex(s,2);
+  int n1=-!nex(s,3);
+  int st=stretch(p1)>>2;
+  m.add(st);
+  p1>>=4;
+  int p0=255-p1;
+  m.add(p1-p0);
+  m.add(st*(n1-n0));
+  m.add((p1&n0)-(p0&n1));
+  m.add((p1&n1)-(p0&n0));
+  return s>0;
+}
+
+// A RunContextMap maps a context into the next byte and a repeat
+// count up to M.  Size should be a power of 2.  Memory usage is 3M/4.
+class RunContextMap {
+  BH<4> t;
+  U8* cp;
+public:
+  RunContextMap(int m): t(m/4) {cp=t[0]+1;}
+  void set(U32 cx) {  // update count
+    if (cp[0]==0 || cp[1]!=buf(1)) cp[0]=1, cp[1]=buf(1);
+    else if (cp[0]<255) ++cp[0];
+    cp=t[cx]+1;
+  }
+  int p() {  // predict next bit
+    if (cp[1]+256>>8-bpos==c0)
+      return ((cp[1]>>7-bpos&1)*2-1)*ilog(cp[0]+1)*8;
+    else
+      return 0;
+  }
+  int mix(Mixer& m) {  // return run length
+    m.add(p());
+    return cp[0]!=0;
+  }
+};
+
+// Context is looked up directly.  m=size is power of 2 in bytes.
+// Context should be < m/512.  High bits are discarded.
+class SmallStationaryContextMap {
+  Array<U16> t;
+  int cxt;
+  U16 *cp;
+public:
+  SmallStationaryContextMap(int m): t(m/2), cxt(0) {
+    assert((m/2&m/2-1)==0); // power of 2?
+    for (int i=0; i<t.size(); ++i)
+      t[i]=32768;
+    cp=&t[0];
+  }
+  void set(U32 cx) {
+    cxt=cx*256&t.size()-256;
+  }
+  void mix(Mixer& m, int rate=7) {
+    *cp += (y<<16)-*cp+(1<<rate-1) >> rate;
+    cp=&t[cxt+c0];
+    m.add(stretch(*cp>>4));
+  }
+};
+
+// Context map for large contexts.  Most modeling uses this type of context
+// map.  It includes a built in RunContextMap to predict the last byte seen
+// in the same context, and also bit-level contexts that map to a bit
+// history state.
+//
+// Bit histories are stored in a hash table.  The table is organized into
+// 64-byte buckets alinged on cache page boundaries.  Each bucket contains
+// a hash chain of 7 elements, plus a 2 element queue (packed into 1 byte) 
+// of the last 2 elements accessed for LRU replacement.  Each element has
+// a 2 byte checksum for detecting collisions, and an array of 7 bit history
+// states indexed by the last 0 to 2 bits of context.  The buckets are indexed
+// by a context ending after 0, 2, or 5 bits of the current byte.  Thus, each
+// byte modeled results in 3 main memory accesses per context, with all other
+// accesses to cache.
+//
+// On bits 0, 2 and 5, the context is updated and a new bucket is selected.
+// The most recently accessed element is tried first, by comparing the
+// 16 bit checksum, then the 7 elements are searched linearly.  If no match
+// is found, then the element with the lowest priority among the 5 elements 
+// not in the LRU queue is replaced.  After a replacement, the queue is
+// emptied (so that consecutive misses favor a LFU replacement policy).
+// In all cases, the found/replaced element is put in the front of the queue.
+//
+// The priority is the state number of the first element (the one with 0
+// additional bits of context).  The states are sorted by increasing n0+n1
+// (number of bits seen), implementing a LFU replacement policy.
+//
+// When the context ends on a byte boundary (bit 0), only 3 of the 7 bit
+// history states are used.  The remaining 4 bytes implement a run model
+// as follows: <count:7,d:1> <b1> <unused> <unused> where <b1> is the last byte
+// seen, possibly repeated.  <count:7,d:1> is a 7 bit count and a 1 bit
+// flag (represented by count * 2 + d).  If d=0 then <count> = 1..127 is the 
+// number of repeats of <b1> and no other bytes have been seen.  If d is 1 then 
+// other byte values have been seen in this context prior to the last <count> 
+// copies of <b1>.
+//
+// As an optimization, the last two hash elements of each byte (representing
+// contexts with 2-7 bits) are not updated until a context is seen for
+// a second time.  This is indicated by <count,d> = <1,0> (2).  After update,
+// <count,d> is updated to <2,0> or <1,1> (4 or 3).
+
+class ContextMap {
+  const int C;  // max number of contexts
+  class E {  // hash element, 64 bytes
+    U16 chk[7];  // byte context checksums
+    U8 last;     // last 2 accesses (0-6) in low, high nibble
+  public:
+    U8 bh[7][7]; // byte context, 3-bit context -> bit history state
+      // bh[][0] = 1st bit, bh[][1,2] = 2nd bit, bh[][3..6] = 3rd bit
+      // bh[][0] is also a replacement priority, 0 = empty
+    U8* get(U16 chk);  // Find element (0-6) matching checksum.
+      // If not found, insert or replace lowest priority (not last).
+  };
+  Array<E, 64> t;  // bit histories for bits 0-1, 2-4, 5-7
+    // For 0-1, also contains a run count in bh[][4] and value in bh[][5]
+    // and pending update count in bh[7]
+  Array<U8*> cp;   // C pointers to current bit history
+  Array<U8*> cp0;  // First element of 7 element array containing cp[i]
+  Array<U32> cxt;  // C whole byte contexts (hashes)
+  Array<U8*> runp; // C [0..3] = count, value, unused, unused
+  StateMap *sm;    // C maps of state -> p
+  int cn;          // Next context to set by set()
+  void update(U32 cx, int c);  // train model that context cx predicts c
+  int mix1(Mixer& m, int cc, int bp, int c1, int y1);
+    // mix() with global context passed as arguments to improve speed.
+public:
+  ContextMap(int m, int c=1);  // m = memory in bytes, a power of 2, C = c
+  ~ContextMap();
+  void set(U32 cx, int next=-1);   // set next whole byte context to cx
+    // if next is 0 then set order does not matter
+  int mix(Mixer& m) {return mix1(m, c0, bpos, buf(1), y);}
+};
+
+// Find or create hash element matching checksum ch
+inline U8* ContextMap::E::get(U16 ch) {
+  if (chk[last&15]==ch) return &bh[last&15][0];
+  int b=0xffff, bi=0;
+  for (int i=0; i<7; ++i) {
+    if (chk[i]==ch) return last=last<<4|i, &bh[i][0];
+    int pri=bh[i][0];
+    if ((last&15)!=i && last>>4!=i && pri<b) b=pri, bi=i;
+  }
+  return last=0xf0|bi, chk[bi]=ch, (U8*)memset(&bh[bi][0], 0, 7);
+}
+
+// Construct using m bytes of memory for c contexts
+ContextMap::ContextMap(int m, int c): C(c), t(m>>6), cp(c), cp0(c),
+    cxt(c), runp(c), cn(0) {
+  assert(m>=64 && (m&m-1)==0);  // power of 2?
+  assert(sizeof(E)==64);
+  sm=new StateMap[C];
+  for (int i=0; i<C; ++i) {
+    cp0[i]=cp[i]=&t[0].bh[0][0];
+    runp[i]=cp[i]+3;
+  }
+}
+
+ContextMap::~ContextMap() {
+  delete[] sm;
+}
+
+// Set the i'th context to cx
+inline void ContextMap::set(U32 cx, int next) {
+  int i=cn++;
+  i&=next;
+  assert(i>=0 && i<C);
+  cx=cx*987654323+i;  // permute (don't hash) cx to spread the distribution
+  cx=cx<<16|cx>>16;
+  cxt[i]=cx*123456791+i;
+}
+
+// Update the model with bit y1, and predict next bit to mixer m.
+// Context: cc=c0, bp=bpos, c1=buf(1), y1=y.
+int ContextMap::mix1(Mixer& m, int cc, int bp, int c1, int y1) {
+
+  // Update model with y
+  int result=0;
+  for (int i=0; i<cn; ++i) {
+    if (cp[i]) {
+      assert(cp[i]>=&t[0].bh[0][0] && cp[i]<=&t[t.size()-1].bh[6][6]);
+      assert((long(cp[i])&63)>=15);
+      int ns=nex(*cp[i], y1);
+      if (ns>=204 && rnd() << (452-ns>>3)) ns-=4;  // probabilistic increment
+      *cp[i]=ns;
+    }
+
+    // Update context pointers
+    if (bpos>1 && runp[i][0]==0)
+      cp[i]=0;
+    else if (bpos==1||bpos==3||bpos==6)
+      cp[i]=cp0[i]+1+(cc&1);
+    else if (bpos==4||bpos==7)
+      cp[i]=cp0[i]+3+(cc&3);
+    else {
+      cp0[i]=cp[i]=t[cxt[i]+cc&t.size()-1].get(cxt[i]>>16);
+
+      // Update pending bit histories for bits 2-7
+      if (bpos==0) {
+        if (cp0[i][3]==2) {
+          const int c=cp0[i][4]+256;
+          U8 *p=t[cxt[i]+(c>>6)&t.size()-1].get(cxt[i]>>16);
+          p[0]=1+((c>>5)&1);
+          p[1+((c>>5)&1)]=1+((c>>4)&1);
+          p[3+((c>>4)&3)]=1+((c>>3)&1);
+          p=t[cxt[i]+(c>>3)&t.size()-1].get(cxt[i]>>16);
+          p[0]=1+((c>>2)&1);
+          p[1+((c>>2)&1)]=1+((c>>1)&1);
+          p[3+((c>>1)&3)]=1+(c&1);
+          cp0[i][6]=0;
+        }
+        // Update run count of previous context
+        if (runp[i][0]==0)  // new context
+          runp[i][0]=2, runp[i][1]=c1;
+        else if (runp[i][1]!=c1)  // different byte in context
+          runp[i][0]=1, runp[i][1]=c1;
+        else if (runp[i][0]<254)  // same byte in context
+          runp[i][0]+=2;
+        else if (runp[i][0]==255)
+          runp[i][0]=128;
+        runp[i]=cp0[i]+3;
+      }
+    }
+
+    // predict from last byte in context
+    int rc=runp[i][0];  // count*2, +1 if 2 different bytes seen
+    if (runp[i][1]+256>>8-bp==cc) {
+      int b=(runp[i][1]>>7-bp&1)*2-1;  // predicted bit + for 1, - for 0
+      int c=ilog(rc+1)<<2+(~rc&1);
+      m.add(b*c);
+    }
+    else
+      m.add(0);
+
+    // predict from bit context
+    result+=mix2(m, cp[i] ? *cp[i] : 0, sm[i]);
+  }
+  if (bp==7) cn=0;
+  return result;
+}
+
+//////////////////////////// Models //////////////////////////////
+
+// All of the models below take a Mixer as a parameter and write
+// predictions to it.
+
+//////////////////////////// matchModel ///////////////////////////
+
+// matchModel() finds the longest matching context and returns its length
+
+int matchModel(Mixer& m) {
+  const int MAXLEN=65534;  // longest allowed match + 1
+  static Array<int> t(MEM);  // hash table of pointers to contexts
+  static int h=0;  // hash of last 7 bytes
+  static int ptr=0;  // points to next byte of match if any
+  static int len=0;  // length of match, or 0 if no match
+  static int result=0;
+  
+  static SmallStationaryContextMap scm1(0x20000);
+
+  if (!bpos) {
+    h=h*997*8+buf(1)+1&t.size()-1;  // update context hash
+    if (len) ++len, ++ptr;
+    else {  // find match
+      ptr=t[h];
+      if (ptr && pos-ptr<buf.size())
+        while (buf(len+1)==buf[ptr-len-1] && len<MAXLEN) ++len;
+    }
+    t[h]=pos;  // update hash table
+    result=len;
+//    if (result>0 && !(result&0xfff)) printf("pos=%d len=%d ptr=%d\n", pos, len, ptr);
+    scm1.set(pos);
+  }
+
+  // predict
+  if (len>MAXLEN) len=MAXLEN;
+  int sgn;
+  if (len && buf(1)==buf[ptr-1] && c0==buf[ptr]+256>>8-bpos) {
+    if (buf[ptr]>>7-bpos&1) sgn=1;
+    else sgn=-1;
+  }
+  else sgn=len=0;
+  m.add(sgn*4*ilog(len));
+  m.add(sgn*64*min(len, 32));
+  scm1.mix(m);
+  return result;
+}
+
+//////////////////////////// picModel //////////////////////////
+
+// Model a 1728 by 2376 2-color CCITT bitmap image, left to right scan,
+// MSB first (216 bytes per row, 513216 bytes total).  Insert predictions
+// into m.
+
+void picModel(Mixer& m) {
+  static U32 r0, r1, r2, r3;  // last 4 rows, bit 8 is over current pixel
+  static Array<U8> t(0x10200);  // model: cxt -> state
+  const int N=3;  // number of contexts
+  static int cxt[N];  // contexts
+  static StateMap sm[N];
+
+  // update the model
+  for (int i=0; i<N; ++i)
+    t[cxt[i]]=nex(t[cxt[i]],y);
+
+  // update the contexts (pixels surrounding the predicted one)
+  r0+=r0+y;
+  r1+=r1+((buf(215)>>(7-bpos))&1);
+  r2+=r2+((buf(431)>>(7-bpos))&1);
+  r3+=r3+((buf(647)>>(7-bpos))&1);
+  cxt[0]=r0&0x7|r1>>4&0x38|r2>>3&0xc0;
+  cxt[1]=0x100+(r0&1|r1>>4&0x3e|r2>>2&0x40|r3>>1&0x80);
+  cxt[2]=0x200+(r0&0x3f^r1&0x3ffe^r2<<2&0x7f00^r3<<5&0xf800);
+
+  // predict
+  for (int i=0; i<N; ++i)
+    m.add(stretch(sm[i].p(t[cxt[i]])));
+}
+
+//////////////////////////// wordModel /////////////////////////
+
+// Model English text (words and columns/end of line)
+
+void wordModel(Mixer& m) {
+  static U32 word0=0, word1=0, word2=0, word3=0, word4=0, word5=0;  // hashes
+  static U32 text0=0;  // hash stream of letters
+  static ContextMap cm(MEM*16, 20);
+  static int nl1=-3, nl=-2;  // previous, current newline position
+
+  // Update word hashes
+  if (bpos==0) {
+    int c=c4&255;
+    if (c>='A' && c<='Z')
+      c+='a'-'A';
+    if (c>='a' && c<='z' || c>=128) {
+      word0=word0*263*32+c;
+      text0=text0*997*16+c;
+    }
+    else if (word0) {
+      word5=word4*23;
+      word4=word3*19;
+      word3=word2*17;
+      word2=word1*13;
+      word1=word0*11;
+      word0=0;
+    }
+    if (c==10) nl1=nl, nl=pos-1;
+    int col=min(255, pos-nl), above=buf[nl1+col]; // text column context
+    U32 h=word0*271+buf(1);
+    
+    cm.set(h);
+    cm.set(word0);
+    cm.set(h+word1);
+    cm.set(word0+word1*31);
+    cm.set(h+word1+word2*29);
+    cm.set(text0&0xffffff);
+    cm.set(text0&0xfffff);
+
+    cm.set(h+word2);
+    cm.set(h+word3);
+    cm.set(h+word4);
+    cm.set(h+word5);
+    cm.set(buf(1)|buf(3)<<8|buf(5)<<16);
+    cm.set(buf(2)|buf(4)<<8|buf(6)<<16);
+
+    cm.set(h+word1+word3);
+    cm.set(h+word2+word3);
+
+    // Text column models
+    cm.set(col<<16|buf(1)<<8|above);
+    cm.set(buf(1)<<8|above);
+    cm.set(col<<8|buf(1));
+    cm.set(col);
+  }
+  cm.mix(m);
+}
+
+//////////////////////////// recordModel ///////////////////////
+
+// Model 2-D data with fixed record length.  Also order 1-2 models
+// that include the distance to the last match.
+
+void recordModel(Mixer& m) {
+  static int cpos1[256] , cpos2[256], cpos3[256], cpos4[256];
+  static int wpos1[0x10000]; // buf(1..2) -> last position
+  static int rlen=2, rlen1=3, rlen2=4;  // run length and 2 candidates
+  static int rcount1=0, rcount2=0;  // candidate counts
+  static ContextMap cm(32768, 3), cn(32768/2, 3), co(32768*2, 3), cp(MEM, 3);
+
+  // Find record length
+  if (!bpos) {
+    int w=c4&0xffff, c=w&255, d=w>>8;
+#if 1
+    int r=pos-cpos1[c];
+    if (r>1 && r==cpos1[c]-cpos2[c]
+        && r==cpos2[c]-cpos3[c] && r==cpos3[c]-cpos4[c]
+        && (r>15 || (c==buf(r*5+1)) && c==buf(r*6+1))) {
+      if (r==rlen1) ++rcount1;
+      else if (r==rlen2) ++rcount2;
+      else if (rcount1>rcount2) rlen2=r, rcount2=1;
+      else rlen1=r, rcount1=1;
+    }
+    if (rcount1>15 && rlen!=rlen1) rlen=rlen1, rcount1=rcount2=0;
+    if (rcount2>15 && rlen!=rlen2) rlen=rlen2, rcount1=rcount2=0;
+
+    // Set 2 dimensional contexts
+    assert(rlen>0);
+#endif
+    cm.set(c<<8| (min(255, pos-cpos1[c])/4) );
+    cm.set(w<<9| llog(pos-wpos1[w])>>2);
+    
+    cm.set(rlen|buf(rlen)<<10|buf(rlen*2)<<18);
+    cn.set(w|rlen<<8);
+    cn.set(d|rlen<<16);
+    cn.set(c|rlen<<8);
+
+    co.set(buf(1)<<8|min(255, pos-cpos1[buf(1)]));
+    co.set(buf(1)<<17|buf(2)<<9|llog(pos-wpos1[w])>>2);
+    int col=pos%rlen;
+    co.set(buf(1)<<8|buf(rlen));
+
+    //cp.set(w*16);
+    //cp.set(d*32);
+    //cp.set(c*64);
+    cp.set(rlen|buf(rlen)<<10|col<<18);
+    cp.set(rlen|buf(1)<<10|col<<18);
+    cp.set(col|rlen<<12);
+
+    // update last context positions
+    cpos4[c]=cpos3[c];
+    cpos3[c]=cpos2[c];
+    cpos2[c]=cpos1[c];
+    cpos1[c]=pos;
+    wpos1[w]=pos;
+  }
+  cm.mix(m);
+  cn.mix(m);
+  co.mix(m);
+  cp.mix(m);
+}
+
+
+//////////////////////////// sparseModel ///////////////////////
+
+// Model order 1-2 contexts with gaps.
+
+void sparseModel(Mixer& m, int seenbefore, int howmany) {
+  static ContextMap cm(MEM*2, 48);
+  static int mask = 0;
+
+  if (bpos==0) {
+
+    cm.set( c4&0x00f0f0f0);
+    cm.set((c4&0xf0f0f0f0)+1);
+    cm.set((c4&0x00f8f8f8)+2);
+    cm.set((c4&0xf8f8f8f8)+3);
+    cm.set((c4&0x00e0e0e0)+4);
+    cm.set((c4&0xe0e0e0e0)+5);
+    cm.set((c4&0x00f0f0ff)+6);
+
+    cm.set(seenbefore);
+    cm.set(howmany);
+    cm.set(c4&0x00ff00ff);
+    cm.set(c4&0xff0000ff);
+    cm.set(buf(1)|buf(5)<<8);
+    cm.set(buf(1)|buf(6)<<8);
+    cm.set(buf(3)|buf(6)<<8);
+    cm.set(buf(4)|buf(8)<<8);
+    
+    for (int i=1; i<8; ++i) {
+      cm.set((buf(i+1)<<8)|buf(i+2));
+      cm.set((buf(i+1)<<8)|buf(i+3));
+      cm.set(seenbefore|buf(i)<<8);
+    }
+
+    int fl = 0;
+    if( c4&0xff != 0 ){
+           if( isalpha( c4&0xff ) ) fl = 1;
+      else if( ispunct( c4&0xff ) ) fl = 2;
+      else if( isspace( c4&0xff ) ) fl = 3;
+      else if( c4&0xff == 0xff ) fl = 4;
+      else if( c4&0xff < 16 ) fl = 5;
+      else if( c4&0xff < 64 ) fl = 6;
+      else fl = 7;
+    }
+    mask = (mask<<3)|fl;
+    cm.set(mask);
+    cm.set(mask<<8|buf(1));
+    cm.set(mask<<17|buf(2)<<8|buf(3));
+    cm.set(mask&0x1ff|((c4&0xf0f0f0f0)<<9));
+  }
+  cm.mix(m);
+}
+
+//////////////////////////// distanceModel ///////////////////////
+
+// Model for modelling distances between symbols
+
+void distanceModel(Mixer& m) {
+  static ContextMap cr(MEM, 3);
+  if( bpos == 0 ){
+    static int pos00=0,pos20=0,posnl=0;
+    int c=c4&0xff;
+    if(c==0x00)pos00=pos;
+    if(c==0x20)pos20=pos;
+    if(c==0xff||c=='\r'||c=='\n')posnl=pos;
+    cr.set(min(pos-pos00,255)|(c<<8));
+    cr.set(min(pos-pos20,255)|(c<<8));
+    cr.set(min(pos-posnl,255)|(c<<8)+234567);
+  }
+  cr.mix(m);
+}
+
+//////////////////////////// bmpModel /////////////////////////////////
+
+// Model a 24-bit color uncompressed .bmp or .tif file.  Return
+// width in pixels if an image file is detected, else 0.
+
+// 32-bit little endian number at buf(i)..buf(i-3)
+inline U32 i4(int i) {
+  assert(i>3);
+  return buf(i)+256*buf(i-1)+65536*buf(i-2)+16777216*buf(i-3);
+}
+
+// 16-bit
+inline int i2(int i) {
+  assert(i>1);
+  return buf(i)+256*buf(i-1);
+}
+
+// Square buf(i)
+inline int sqrbuf(int i) {
+  assert(i>0);
+  return buf(i)*buf(i);
+}
+
+int bmpModel(Mixer& m) {
+  static int w=0;  // width of image in bytes (pixels * 3)
+  static int eoi=0;     // end of image
+  static U32 tiff=0;  // offset of tif header
+  const int SC=0x20000;
+  static SmallStationaryContextMap scm1(SC), scm2(SC),
+    scm3(SC), scm4(SC), scm5(SC), scm6(SC*2);
+  static ContextMap cm(MEM*4, 8);
+
+  // Detect .bmp file header (24 bit color, not compressed)
+  if (!bpos && buf(54)=='B' && buf(53)=='M'
+      && i4(44)==54 && i4(40)==40 && i4(24)==0) {
+    w=(i4(36)+3&-4)*3;  // image width
+    const int height=i4(32);
+    eoi=pos;
+    if (w<0x30000 && height<0x10000) {
+      eoi=pos+w*height;  // image size in bytes
+      printf("BMP %dx%d ", w/3, height);
+    }
+    else
+      eoi=pos;
+  }
+
+  // Detect .tif file header (24 bit color, not compressed).
+  // Parsing is crude, won't work with weird formats.
+  if (!bpos) {
+    if (c4==0x49492a00) tiff=pos;  // Intel format only
+    if (pos-tiff==4 && c4!=0x08000000) tiff=0; // 8=normal offset to directory
+    if (tiff && pos-tiff==200) {  // most of directory should be read by now
+      int dirsize=i2(pos-tiff-4);  // number of 12-byte directory entries
+      w=0;
+      int bpp=0, compression=0, width=0, height=0;
+      for (int i=tiff+6; i<pos-12 && --dirsize>0; i+=12) {
+        int tag=i2(pos-i);  // 256=width, 257==height, 259: 1=no compression
+          // 277=3 samples/pixel
+        int tagfmt=i2(pos-i-2);  // 3=short, 4=long
+        int taglen=i4(pos-i-4);  // number of elements in tagval
+        int tagval=i4(pos-i-8);  // 1 long, 1-2 short, or points to array
+        if ((tagfmt==3||tagfmt==4) && taglen==1) {
+          if (tag==256) width=tagval;
+          if (tag==257) height=tagval;
+          if (tag==259) compression=tagval; // 1 = no compression
+          if (tag==277) bpp=tagval;  // should be 3
+        }
+      }
+      if (width>0 && height>0 && width*height>50 && compression==1
+          && (bpp==1||bpp==3))
+        eoi=tiff+width*height*bpp, w=width*bpp;
+      if (eoi>pos)
+        printf("TIFF %dx%dx%d ", width, height, bpp);
+      else
+        tiff=w=0;
+    }
+  }
+  if (pos>eoi) return w=0;
+
+  // Select nearby pixels as context
+  if (!bpos) {
+    assert(w>3);
+    int color=pos%3;
+    int mean=buf(3)+buf(w-3)+buf(w)+buf(w+3);
+    const int var=sqrbuf(3)+sqrbuf(w-3)+sqrbuf(w)+sqrbuf(w+3)-mean*mean/4>>2;
+    mean>>=2;
+    const int logvar=ilog(var);
+    int i=0;
+    cm.set(hash(++i, buf(3)>>2, buf(w)>>2, color));
+    cm.set(hash(++i, buf(3)>>2, buf(1)>>2, color));
+    cm.set(hash(++i, buf(3)>>2, buf(2)>>2, color));
+    cm.set(hash(++i, buf(w)>>2, buf(1)>>2, color));
+    cm.set(hash(++i, buf(w)>>2, buf(2)>>2, color));
+    cm.set(hash(++i, buf(3)+buf(w)>>1, color));
+    cm.set(hash(++i, buf(3)+buf(w)>>3, buf(1)>>5, buf(2)>>5, color));
+    cm.set(hash(++i, mean, logvar>>5, color));
+    scm1.set(buf(3)+buf(w)>>1);
+    scm2.set(buf(3)+buf(w)-buf(w+3)>>1);
+    scm3.set(buf(3)*2-buf(6)>>1);
+    scm4.set(buf(w)*2-buf(w*2)>>1);
+    scm5.set(buf(3)+buf(w)-buf(w-3)>>1);
+    scm6.set(mean>>1|logvar<<1&0x180);
+  }
+
+  // Predict next bit
+  scm1.mix(m);
+  scm2.mix(m);
+  scm3.mix(m);
+  scm4.mix(m);
+  scm5.mix(m);
+  scm6.mix(m);
+  cm.mix(m);
+  return w;
+}
+
+//////////////////////////// jpegModel /////////////////////////
+
+// Model JPEG. Return 1 if a JPEG file is detected or else 0.
+// Only the baseline and 8 bit extended Huffman coded DCT modes are
+// supported.  The model partially decodes the JPEG image to provide
+// context for the Huffman coded symbols.
+
+// Print a JPEG segment at buf[p...] for debugging
+void dump(const char* msg, int p) {
+  printf("%s:", msg);
+  int len=buf[p+2]*256+buf[p+3];
+  for (int i=0; i<len+2; ++i)
+    printf(" %02X", buf[p+i]);
+  printf("\n");
+}
+
+// Detect invalid JPEG data.  The proper response is to silently
+// fall back to a non-JPEG model.
+#define jassert(x) if (!(x)) { \
+/*  printf("JPEG error at %d, line %d: %s\n", pos, __LINE__, #x); */ \
+  jpeg=0; \
+  return next_jpeg;}
+
+struct HUF {U32 min, max; int val;}; // Huffman decode tables
+  // huf[Tc][Th][m] is the minimum, maximum+1, and pointer to codes for
+  // coefficient type Tc (0=DC, 1=AC), table Th (0-3), length m+1 (m=0-15)
+
+int jpegModel(Mixer& m) {
+
+  // State of parser
+  enum {SOF0=0xc0, SOF1, SOF2, SOF3, DHT, RST0=0xd0, SOI=0xd8, EOI, SOS, DQT,
+    DNL, DRI, APP0=0xe0, COM=0xfe, FF};  // Second byte of 2 byte codes
+  static int jpeg=0;  // 1 if JPEG is header detected, 2 if image data
+  static int next_jpeg=0;  // updated with jpeg on next byte boundary
+  static int app;  // Bytes remaining to skip in APPx or COM field
+  static int sof=0, sos=0, data=0;  // pointers to buf
+  static Array<int> ht(8);  // pointers to Huffman table headers
+  static int htsize=0;  // number of pointers in ht
+
+  // Huffman decode state
+  static U32 huffcode=0;  // Current Huffman code including extra bits
+  static int huffbits=0;  // Number of valid bits in huffcode
+  static int huffsize=0;  // Number of bits without extra bits
+  static int rs=-1;  // Decoded huffcode without extra bits.  It represents
+    // 2 packed 4-bit numbers, r=run of zeros, s=number of extra bits for
+    // first nonzero code.  huffcode is complete when rs >= 0.
+    // rs is -1 prior to decoding incomplete huffcode.
+  static int mcupos=0;  // position in MCU (0-639).  The low 6 bits mark
+    // the coefficient in zigzag scan order (0=DC, 1-63=AC).  The high
+    // bits mark the block within the MCU, used to select Huffman tables.
+
+  // Decoding tables
+  static Array<HUF> huf(128);  // Tc*64+Th*16+m -> min, max, val
+  static int mcusize=0;  // number of coefficients in an MCU
+  static int linesize=0; // width of image in MCU
+  static int hufsel[2][10];  // DC/AC, mcupos/64 -> huf decode table
+  static Array<U8> hbuf(2048);  // Tc*1024+Th*256+hufcode -> RS
+
+  // Image state
+  static Array<int> color(10);  // block -> component (0-3)
+  static Array<int> pred(4);  // component -> last DC value
+  static int dc=0;  // DC value of the current block
+  static int width=0;  // Image width in MCU
+  static int row=0, column=0;  // in MCU (column 0 to width-1)
+  static Buf cbuf(0x20000); // Rotating buffer of coefficients, coded as:
+    // DC: level shifted absolute value, low 4 bits discarded, i.e.
+    //   [-1023...1024] -> [0...255].
+    // AC: as an RS code: a run of R (0-15) zeros followed by an S (0-15)
+    //   bit number, or 00 for end of block (in zigzag order).
+    //   However if R=0, then the format is ssss11xx where ssss is S,
+    //   xx is the first 2 extra bits, and the last 2 bits are 1 (since
+    //   this never occurs in a valid RS code).
+  static int cpos=0;  // position in cbuf
+  static U32 huff1=0, huff2=0, huff3=0, huff4=0;  // hashes of last codes
+  static int rs1, rs2, rs3, rs4;  // last 4 RS codes
+  static int ssum=0, ssum1=0, ssum2=0, ssum3=0, ssum4=0;
+    // sum of S in RS codes in block and last 4 values
+
+  // Be sure to quit on a byte boundary
+  if (!bpos) next_jpeg=jpeg>1;
+  if (bpos && !jpeg) return next_jpeg;
+  if (!bpos && app>0) --app;
+  if (app>0) return next_jpeg;
+  if (!bpos) {
+
+    // Parse.  Baseline DCT-Huffman JPEG syntax is:
+    // SOI APPx... misc... SOF0 DHT... SOS data EOI
+    // SOI (= FF D8) start of image.
+    // APPx (= FF Ex) len ... where len is always a 2 byte big-endian length
+    //   including the length itself but not the 2 byte preceding code.
+    //   Application data is ignored.  There may be more than one APPx.
+    // misc codes are DQT, DNL, DRI, COM (ignored).
+    // SOF0 (= FF C0) len 08 height width Nf [C HV Tq]...
+    //   where len, height, width (in pixels) are 2 bytes, Nf is the repeat
+    //   count (1 byte) of [C HV Tq], where C is a component identifier
+    //   (color, 0-3), HV is the horizontal and vertical dimensions
+    //   of the MCU (high, low bits, packed), and Tq is the quantization
+    //   table ID (not used).  An MCU (minimum compression unit) consists
+    //   of 64*H*V DCT coefficients for each color.
+    // DHT (= FF C4) len [TcTh L1...L16 V1,1..V1,L1 ... V16,1..V16,L16]...
+    //   defines Huffman table Th (1-4) for Tc (0=DC (first coefficient)
+    //   1=AC (next 63 coefficients)).  L1..L16 are the number of codes
+    //   of length 1-16 (in ascending order) and Vx,y are the 8-bit values.
+    //   A V code of RS means a run of R (0-15) zeros followed by S (0-15)
+    //   additional bits to specify the next nonzero value, negative if
+    //   the first additional bit is 0 (e.g. code x63 followed by the
+    //   3 bits 1,0,1 specify 7 coefficients: 0, 0, 0, 0, 0, 0, 5.
+    //   Code 00 means end of block (remainder of 63 AC coefficients is 0).
+    // SOS (= FF DA) len Ns [Cs TdTa]... 0 3F 00
+    //   Start of scan.  TdTa specifies DC/AC Huffman tables (0-3, packed
+    //   into one byte) for component Cs matching C in SOF0, repeated
+    //   Ns (1-4) times.
+    // EOI (= FF D9) is end of image.
+    // Huffman coded data is between SOI and EOI.  Codes may be embedded:
+    // RST0-RST7 (= FF D0 to FF D7) mark the start of an independently
+    //   compressed region.
+    // DNL (= FF DC) 04 00 height
+    //   might appear at the end of the scan (ignored).
+    // FF 00 is interpreted as FF (to distinguish from RSTx, DNL, EOI).
+
+    // Detect JPEG (SOI, APPx)
+    if (!jpeg && buf(4)==FF && buf(3)==SOI && buf(2)==FF && buf(1)>>4==0xe) {
+      jpeg=1;
+      app=sos=sof=htsize=data=mcusize=linesize=0;
+      huffcode=huffbits=huffsize=mcupos=cpos=0, rs=-1;
+      memset(&huf[0], 0, huf.size()*sizeof(HUF));
+      memset(&pred[0], 0, pred.size()*sizeof(int));
+    }
+
+    // Detect end of JPEG when data contains a marker other than RSTx
+    // or byte stuff (00).
+    if (jpeg && data && buf(2)==FF && buf(1) && (buf(1)&0xf8)!=RST0) {
+      jassert(buf(1)==EOI);
+      jpeg=0;
+    }
+    if (!jpeg) return next_jpeg;
+
+    // Detect APPx or COM field
+    if (!data && !app && buf(4)==FF && (buf(3)>>4==0xe || buf(3)==COM))
+      app=buf(2)*256+buf(1)+2;
+
+    // Save pointers to sof, ht, sos, data,
+    if (buf(5)==FF && buf(4)==SOS) {
+      int len=buf(3)*256+buf(2);
+      if (len==6+2*buf(1) && buf(1) && buf(1)<=4)  // buf(1) is Ns
+        sos=pos-5, data=sos+len+2, jpeg=2;
+    }
+    if (buf(4)==FF && buf(3)==DHT && htsize<8) ht[htsize++]=pos-4;
+    if (buf(4)==FF && buf(3)==SOF0) sof=pos-4;
+
+    // Restart
+    if (buf(2)==FF && (buf(1)&0xf8)==RST0) {
+      huffcode=huffbits=huffsize=mcupos=0, rs=-1;
+      memset(&pred[0], 0, pred.size()*sizeof(int));
+    }
+  }
+
+  {
+    // Build Huffman tables
+    // huf[Tc][Th][m] = min, max+1 codes of length m, pointer to byte values
+    if (pos==data && bpos==1) {
+      jassert(htsize>0);
+      for (int i=0; i<htsize; ++i) {
+        int p=ht[i]+4;  // pointer to current table after length field
+        int end=p+buf[p-2]*256+buf[p-1]-2;  // end of Huffman table
+        int count=0;  // sanity check
+        while (p<end && end<pos && end<p+2100 && ++count<10) {
+          int tc=buf[p]>>4, th=buf[p]&15;
+          if (tc>=2 || th>=4) break;
+          jassert(tc>=0 && tc<2 && th>=0 && th<4);
+          HUF* h=&huf[tc*64+th*16]; // [tc][th][0]; 
+          int val=p+17;  // pointer to values
+          int hval=tc*1024+th*256;  // pointer to RS values in hbuf
+          for (int j=0; j<256; ++j) // copy RS codes
+            hbuf[hval+j]=buf[val+j];
+          int code=0;
+          for (int j=0; j<16; ++j) {
+            h[j].min=code;
+            h[j].max=code+=buf[p+j+1];
+            h[j].val=hval;
+            val+=buf[p+j+1];
+            hval+=buf[p+j+1];
+            code*=2;
+          }
+          p=val;
+          jassert(hval>=0 && hval<2048);
+        }
+        jassert(p==end);
+      }
+      huffcode=huffbits=huffsize=0, rs=-1;
+
+      // Build Huffman table selection table (indexed by mcupos).
+      // Get image width.
+      if (!sof && sos) return next_jpeg;
+      int ns=buf[sos+4];
+      int nf=buf[sof+9];
+      jassert(ns<=4 && nf<=4);
+      mcusize=0;  // blocks per MCU
+      int hmax=0;  // MCU horizontal dimension
+      for (int i=0; i<ns; ++i) {
+        for (int j=0; j<nf; ++j) {
+          if (buf[sos+2*i+5]==buf[sof+3*j+10]) { // Cs == C ?
+            int hv=buf[sof+3*j+11];  // packed dimensions H x V
+            if (hv>>4>hmax) hmax=hv>>4;
+            hv=(hv&15)*(hv>>4);  // number of blocks in component C
+            jassert(hv>=1 && hv+mcusize<=10);
+            while (hv) {
+              jassert(mcusize<10);
+              hufsel[0][mcusize]=buf[sos+2*i+6]>>4&15;
+              hufsel[1][mcusize]=buf[sos+2*i+6]&15;
+              jassert (hufsel[0][mcusize]<4 && hufsel[1][mcusize]<4);
+              color[mcusize]=i;
+              --hv;
+              ++mcusize;
+            }
+          }
+        }
+      }
+      jassert(hmax>=1 && hmax<=10);
+      width=buf[sof+7]*256+buf[sof+8];  // in pixels
+      int height=buf[sof+5]*256+buf[sof+6];
+      printf("JPEG %dx%d ", width, height);
+      width=(width-1)/(hmax*8)+1;  // in MCU
+      jassert(width>0);
+      mcusize*=64;  // coefficients per MCU
+      row=column=0;
+    }
+  }
+
+
+  // Decode Huffman
+  {
+    if (mcusize && buf(1+(!bpos))!=FF) {  // skip stuffed byte
+      jassert(huffbits<=32);
+      huffcode+=huffcode+y;
+      ++huffbits;
+      if (rs<0) {
+        jassert(huffbits>=1 && huffbits<=16);
+        const int ac=(mcupos&63)>0;
+        jassert(mcupos>=0 && (mcupos>>6)<10);
+        jassert(ac==0 || ac==1);
+        const int sel=hufsel[ac][mcupos>>6];
+        jassert(sel>=0 && sel<4);
+        const int i=huffbits-1;
+        jassert(i>=0 && i<16);
+        const HUF *h=&huf[ac*64+sel*16]; // [ac][sel];
+        jassert(h[i].min<=h[i].max && h[i].val<2048 && huffbits>0);
+        if (huffcode<h[i].max) {
+          jassert(huffcode>=h[i].min);
+          int k=h[i].val+huffcode-h[i].min;
+          jassert(k>=0 && k<2048);
+          rs=hbuf[k];
+          huffsize=huffbits;
+        }
+      }
+      if (rs>=0) {
+        if (huffsize+(rs&15)==huffbits) { // done decoding
+          huff4=huff3;
+          huff3=huff2;
+          huff2=huff1;
+          huff1=hash(huffcode, huffbits);
+          rs4=rs3;
+          rs3=rs2;
+          rs2=rs1;
+          rs1=rs;
+          int x=0;  // decoded extra bits
+          if (mcupos&63) {  // AC
+            if (rs==0) { // EOB
+              mcupos=mcupos+63&-64;
+              jassert(mcupos>=0 && mcupos<=mcusize && mcupos<=640);
+              while (cpos&63) cbuf[cpos++]=0;
+            }
+            else {  // rs = r zeros + s extra bits for the next nonzero value
+                    // If first extra bit is 0 then value is negative.
+              jassert((rs&15)<=10);
+              const int r=rs>>4;
+              const int s=rs&15;
+              jassert(mcupos>>6==mcupos+r>>6);
+              mcupos+=r+1;
+              x=huffcode&(1<<s)-1;
+              if (s && !(x>>s-1)) x-=(1<<s)-1;
+              for (int i=r; i>=1; --i) cbuf[cpos++]=i<<4|s;
+              cbuf[cpos++]=s<<4|huffcode<<2>>s&3|12;
+              ssum+=s;
+            }
+          }
+          else {  // DC: rs = 0S, s<12
+            jassert(rs<12);
+            ++mcupos;
+            x=huffcode&(1<<rs)-1;
+            if (rs && !(x>>rs-1)) x-=(1<<rs)-1;
+            jassert(mcupos>=0 && mcupos>>6<10);
+            const int comp=color[mcupos>>6];
+            jassert(comp>=0 && comp<4);
+            dc=pred[comp]+=x;
+            jassert((cpos&63)==0);
+            cbuf[cpos++]=dc+1023>>3;
+            ssum4=ssum3;
+            ssum3=ssum2;
+            ssum2=ssum1;
+            ssum1=ssum;
+            ssum=rs;
+          }
+          jassert(mcupos>=0 && mcupos<=mcusize);
+          if (mcupos>=mcusize) {
+            mcupos=0;
+            if (++column==width) column=0, ++row;
+          }
+          huffcode=huffsize=huffbits=0, rs=-1;
+        }
+      }
+    }
+  }
+
+  // Estimate next bit probability
+  if (!jpeg || !data) return next_jpeg;
+
+  // Context model
+  const int N=19;  // size of t, number of contexts
+  static BH<9> t(MEM);  // context hash -> bit history
+    // As a cache optimization, the context does not include the last 1-2
+    // bits of huffcode if the length (huffbits) is not a multiple of 3.
+    // The 7 mapped values are for context+{"", 0, 00, 01, 1, 10, 11}.
+  static Array<U32> cxt(N);  // context hashes
+  static Array<U8*> cp(N);  // context pointers
+  static StateMap sm[N];
+  static Mixer m1(32, 800, 4);
+  static APM a1(1024), a2(0x10000);
+  const static U8 zzu[64]={  // zigzag coef -> u,v
+    0,1,0,0,1,2,3,2,1,0,0,1,2,3,4,5,4,3,2,1,0,0,1,2,3,4,5,6,7,6,5,4,
+    3,2,1,0,1,2,3,4,5,6,7,7,6,5,4,3,2,3,4,5,6,7,7,6,5,4,5,6,7,7,6,7};
+  const static U8 zzv[64]={
+    0,0,1,2,1,0,0,1,2,3,4,3,2,1,0,0,1,2,3,4,5,6,5,4,3,2,1,0,0,1,2,3,
+    4,5,6,7,7,6,5,4,3,2,1,2,3,4,5,6,7,7,6,5,4,3,4,5,6,7,7,6,5,6,7,7};
+
+
+  // Update model
+  if (cp[N-1]) {
+    for (int i=0; i<N; ++i)
+      *cp[i]=nex(*cp[i],y);
+  }
+  m1.update();
+
+  // Update context
+  const int comp=color[mcupos>>6];
+  const int coef=(mcupos&63)|comp<<6;
+  const int hc=huffcode|1<<huffbits;
+  static int hbcount=2;
+  if (++hbcount>2 || huffbits==0) hbcount=0;
+  jassert(coef>=0 && coef<256);
+  const int zu=zzu[mcupos&63], zv=zzv[mcupos&63];
+  if (hbcount==0) {
+    const int mpos=mcupos>>4|!(mcupos&-64)<<7;
+    int n=0;
+    cxt[0]=hash(++n, hc, mcupos>>2, min(3, mcupos&63));
+    cxt[1]=hash(++n, hc, mpos>>4, cbuf[cpos-mcusize]);
+    cxt[2]=hash(++n, hc, mpos>>4, cbuf[cpos-width*mcusize]);
+    cxt[3]=hash(++n, hc, ilog(ssum3), coef);
+    cxt[4]=hash(++n, hc, coef, column>>3);
+    cxt[5]=hash(++n, hc, coef, column>>1);
+    cxt[6]=hash(++n, hc, rs1, mpos);
+    cxt[7]=hash(++n, hc, rs1, rs2);
+    cxt[8]=hash(++n, hc, rs1, rs2, rs3);
+    cxt[9]=hash(++n, hc, ssum>>4, mcupos);
+    cxt[10]=hash(++n, hc, mpos, cbuf[cpos-1]);
+    cxt[11]=hash(++n, hc, dc);
+    cxt[12]=hash(++n, hc, rs1, coef);
+    cxt[13]=hash(++n, hc, rs1, rs2, coef);
+    cxt[14]=hash(++n, hc, mcupos>>3, ssum3>>3);
+    cxt[15]=hash(++n, hc, huff1);
+    cxt[16]=hash(++n, hc, coef, huff1);
+    cxt[17]=hash(++n, hc, zu, comp);
+    cxt[18]=hash(++n, hc, zv, comp);
+  }
+
+  // Predict next bit
+  m1.add(128);
+  assert(hbcount<=2);
+  for (int i=0; i<N; ++i) {
+    if (hbcount==0) cp[i]=t[cxt[i]]+1;
+    else if (hbcount==1) cp[i]+=1+(huffcode&1)*3;
+    else cp[i]+=1+(huffcode&1);
+    int sp=stretch(sm[i].p(*cp[i]));
+    m1.add(sp);
+  }
+  m1.set(0, 1);
+  m1.set(coef, 64);
+  m1.set(mcupos, 640);
+  int pr=m1.p();
+  pr=a1.p(pr, hc&1023);
+  pr=a2.p(pr, hc&255|coef<<8);
+  m.add(stretch(pr));
+  return 1;
+}
+
+//////////////////////////// exeModel /////////////////////////
+
+// Model x86 code.  The contexts are sparse containing only those
+// bits relevant to parsing (2 prefixes, opcode, and mod and r/m fields
+// of modR/M byte).
+
+// Get context at buf(i) relevant to parsing 32-bit x86 code
+U32 execxt(int i, int x=0) {
+  int prefix=(buf(i+2)==0x0f)+2*(buf(i+2)==0x66)+3*(buf(i+2)==0x67)
+    +4*(buf(i+3)==0x0f)+8*(buf(i+3)==0x66)+12*(buf(i+3)==0x67);
+  int opcode=buf(i+1);
+  int modrm=i ? buf(i)&0xc7 : 0;
+  return prefix|opcode<<4|modrm<<12|x<<20;
+}
+
+void exeModel(Mixer& m) {
+  const int N=12;
+  static ContextMap cm(MEM, N);
+  if (!bpos) {
+    for (int i=0; i<N; ++i)
+      cm.set(execxt(i, buf(1)*(i>4)));
+  }
+  cm.mix(m);
+}
+
+//////////////////////////// indirectModel /////////////////////
+
+// The context is a byte string history that occurs within a
+// 1 or 2 byte context.
+
+void indirectModel(Mixer& m) {
+  static ContextMap cm(MEM, 6);
+  static U32 t1[256];
+  static U16 t2[0x10000];
+
+  if (!bpos) {
+    U32 d=c4&0xffff, c=d&255;
+    U32& r1=t1[d>>8];
+    r1=r1<<8|c;
+    U16& r2=t2[c4>>8&0xffff];
+    r2=r2<<8|c;
+    U32 t=c|t1[c]<<8;
+    cm.set(t&0xffff);
+    cm.set(t&0xffffff);
+    cm.set(t);
+    cm.set(t&0xff00);
+    t=d|t2[d]<<16;
+    cm.set(t&0xffffff);
+    cm.set(t);
+
+  }
+  cm.mix(m);
+}
+
+//////////////////////////// dmcModel //////////////////////////
+
+// Model using DMC.  The bitwise context is represented by a state graph,
+// initilaized to a bytewise order 1 model as in 
+// http://plg.uwaterloo.ca/~ftp/dmc/dmc.c but with the following difference:
+// - It uses integer arithmetic.
+// - The threshold for cloning a state increases as memory is used up.
+// - Each state maintains both a 0,1 count and a bit history (as in a
+//   context model).  The 0,1 count is best for stationary data, and the
+//   bit history for nonstationary data.  The bit history is mapped to
+//   a probability adaptively using a StateMap.  The two computed probabilities
+//   are combined.
+// - When memory is used up the state graph is reinitialized to a bytewise
+//   order 1 context as in the original DMC.  However, the bit histories
+//   are not cleared.
+
+struct DMCNode {  // 12 bytes
+  unsigned int nx[2];  // next pointers
+  U8 state;  // bit history
+  unsigned int c0:12, c1:12;  // counts * 256
+};
+
+void dmcModel(Mixer& m) {
+  static int top=0, curr=0;  // allocated, current node
+  static Array<DMCNode> t(MEM*2);  // state graph
+  static StateMap sm;
+  static int threshold=256;
+
+  // clone next state
+  if (top>0 && top<t.size()) {
+    int next=t[curr].nx[y];
+    int n=y?t[curr].c1:t[curr].c0;
+    int nn=t[next].c0+t[next].c1;
+    if (n>=threshold*2 && nn-n>=threshold*3) {
+      int r=n*4096/nn;
+      assert(r>=0 && r<=4096);
+      t[next].c0 -= t[top].c0 = t[next].c0*r>>12;
+      t[next].c1 -= t[top].c1 = t[next].c1*r>>12;
+      t[top].nx[0]=t[next].nx[0];
+      t[top].nx[1]=t[next].nx[1];
+      t[top].state=t[next].state;
+      t[curr].nx[y]=top;
+      ++top;
+      if (top==MEM*2) threshold=512;
+      if (top==MEM*3) threshold=768;
+    }
+  }
+
+  // Initialize to a bytewise order 1 model at startup or when flushing memory
+  if (top==t.size() && bpos==1) top=0;
+  if (top==0) {
+    assert(t.size()>=65536);
+    for (int i=0; i<256; ++i) {
+      for (int j=0; j<256; ++j) {
+        if (i<127) {
+          t[j*256+i].nx[0]=j*256+i*2+1;
+          t[j*256+i].nx[1]=j*256+i*2+2;
+        }
+        else {
+          t[j*256+i].nx[0]=(i-127)*256;
+          t[j*256+i].nx[1]=(i+1)*256;
+        }
+        t[j*256+i].c0=128;
+        t[j*256+i].c1=128;
+      }
+    }
+    top=65536;
+    curr=0;
+    threshold=256;
+  }
+
+  // update count, state
+  if (y) {
+    if (t[curr].c1<3800) t[curr].c1+=256;
+  }
+  else if (t[curr].c0<3800) t[curr].c0+=256;
+  t[curr].state=nex(t[curr].state, y);
+  curr=t[curr].nx[y];
+
+  // predict
+  const int pr1=sm.p(t[curr].state);
+  const int n1=t[curr].c1;
+  const int n0=t[curr].c0;
+  const int pr2=(n1+5)*4096/(n0+n1+10);
+  m.add(stretch(pr1));
+  m.add(stretch(pr2));
+}
+
+//////////////////////////// contextModel //////////////////////
+
+typedef enum {DEFAULT, JPEG, EXE, TEXT} Filetype;
+
+// This combines all the context models with a Mixer.
+
+int contextModel2() {
+  static ContextMap cm(MEM*32, 9);
+  static RunContextMap rcm7(MEM), rcm9(MEM), rcm10(MEM);
+  static Mixer m(800, 3088, 7, 128);
+  static U32 cxt[16];  // order 0-11 contexts
+  static Filetype filetype=DEFAULT;
+  static int size=0;  // bytes remaining in block
+//  static const char* typenames[4]={"", "jpeg ", "exe ", "text "};
+
+  // Parse filetype and size
+  if (bpos==0) {
+    --size;
+    if (size==-1) filetype=(Filetype)buf(1);
+    if (size==-5) {
+      size=buf(4)<<24|buf(3)<<16|buf(2)<<8|buf(1);
+//      if (filetype<=3) printf("(%s%d)", typenames[filetype], size);
+      if (filetype==EXE) size+=8;
+    }
+  }
+
+  m.update();
+  m.add(256);
+
+  // Test for special file types
+  int isjpeg=jpegModel(m);  // 1 if JPEG is detected, else 0
+  int ismatch=ilog(matchModel(m));  // Length of longest matching context
+  int isbmp=bmpModel(m);  // Image width (bytes) if BMP or TIFF detected, or 0
+
+  if (isjpeg) {
+    m.set(1, 8);
+    m.set(c0, 256);
+    m.set(buf(1), 256);
+    return m.p();
+  }
+  else if (isbmp>0) {
+    static int col=0;
+    if (++col>=24) col=0;
+    m.set(2, 8);
+    m.set(col, 24);
+    m.set(buf(isbmp)+buf(3)>>4, 32);
+    m.set(c0, 256);
+    return m.p();
+  }
+
+
+  // Normal model
+  if (bpos==0) {
+    for (int i=15; i>0; --i)  // update order 0-11 context hashes
+      cxt[i]=cxt[i-1]*257+(c4&255)+1;
+    for (int i=0; i<7; ++i)
+      cm.set(cxt[i]);
+    rcm7.set(cxt[7]);
+    cm.set(cxt[8]);
+    rcm9.set(cxt[10]);
+    rcm10.set(cxt[12]);
+    cm.set(cxt[14]);
+  }
+  int order=cm.mix(m);
+  
+  rcm7.mix(m);
+  rcm9.mix(m);
+  rcm10.mix(m);
+
+  if (level>=4) {
+    sparseModel(m,ismatch,order);
+    distanceModel(m);
+    picModel(m);
+    recordModel(m);  
+    wordModel(m);
+    indirectModel(m);
+    dmcModel(m);
+    if (filetype==EXE) exeModel(m);
+  }
+
+
+
+  order = order-2;
+  if(order<0) order=0;
+
+  U32 c1=buf(1), c2=buf(2), c3=buf(3), c;
+
+  m.set(c1+8, 264);
+  m.set(c0, 256);
+  m.set(order+8*(c4>>5&7)+64*(c1==c2)+128*(filetype==EXE), 256);
+  m.set(c2, 256);
+  m.set(c3, 256);
+  m.set(ismatch, 256);
+  
+  if(bpos)
+  {	
+    c=c0<<(8-bpos); if(bpos==1)c+=c3/2;
+    c=(min(bpos,5))*256+c1/32+8*(c2/32)+(c&192);
+  }
+  else c=c3/128+(c4>>31)*2+4*(c2/64)+(c1&240);
+  m.set(c, 1536);
+  int pr=m.p();
+  return pr;
+}
+
+
+//////////////////////////// Predictor /////////////////////////
+
+// A Predictor estimates the probability that the next bit of
+// uncompressed data is 1.  Methods:
+// p() returns P(1) as a 12 bit number (0-4095).
+// update(y) trains the predictor with the actual bit (0 or 1).
+
+class Predictor {
+  int pr;  // next prediction
+public:
+  Predictor();
+  int p() const {assert(pr>=0 && pr<4096); return pr;}
+  void update();
+};
+
+Predictor::Predictor(): pr(2048) {}
+
+void Predictor::update() {
+  static APM a(256), a1(0x10000), a2(0x10000), a3(0x10000),
+                      a4(0x10000), a5(0x10000), a6(0x10000);
+
+  // Update global context: pos, bpos, c0, c4, buf
+  c0+=c0+y;
+  if (c0>=256) {
+    buf[pos++]=c0;
+    c4=(c4<<8)+c0-256;
+    c0=1;
+  }
+  bpos=(bpos+1)&7;
+
+  // Filter the context model with APMs
+  int pr0=contextModel2();
+
+  pr=a.p(pr0, c0);
+  
+  int pr1=a1.p(pr0, c0+256*buf(1));
+  int pr2=a2.p(pr0, c0^hash(buf(1), buf(2))&0xffff);
+  int pr3=a3.p(pr0, c0^hash(buf(1), buf(2), buf(3))&0xffff);
+  pr0=pr0+pr1+pr2+pr3+2>>2;
+  
+      pr1=a4.p(pr, c0+256*buf(1));
+      pr2=a5.p(pr, c0^hash(buf(1), buf(2))&0xffff);
+      pr3=a6.p(pr, c0^hash(buf(1), buf(2), buf(3))&0xffff);
+  pr=pr+pr1+pr2+pr3+2>>2;
+
+  pr=pr+pr0+1>>1;
+}
+
+//////////////////////////// Encoder ////////////////////////////
+
+// An Encoder does arithmetic encoding.  Methods:
+// Encoder(COMPRESS, f) creates encoder for compression to archive f, which
+//   must be open past any header for writing in binary mode.
+// Encoder(DECOMPRESS, f) creates encoder for decompression from archive f,
+//   which must be open past any header for reading in binary mode.
+// code(i) in COMPRESS mode compresses bit i (0 or 1) to file f.
+// code() in DECOMPRESS mode returns the next decompressed bit from file f.
+//   Global y is set to the last bit coded or decoded by code().
+// compress(c) in COMPRESS mode compresses one byte.
+// decompress() in DECOMPRESS mode decompresses and returns one byte.
+// flush() should be called exactly once after compression is done and
+//   before closing f.  It does nothing in DECOMPRESS mode.
+// size() returns current length of archive
+// setFile(f) sets alternate source to FILE* f for decompress() in COMPRESS
+//   mode (for testing transforms).
+// If level (global) is 0, then data is stored without arithmetic coding.
+
+typedef enum {COMPRESS, DECOMPRESS} Mode;
+class Encoder {
+private:
+  Predictor predictor;
+  const Mode mode;       // Compress or decompress?
+  FILE* archive;         // Compressed data file
+  U32 x1, x2;            // Range, initially [0, 1), scaled by 2^32
+  U32 x;                 // Decompress mode: last 4 input bytes of archive
+  FILE *alt;             // decompress() source in COMPRESS mode
+
+  // Compress bit y or return decompressed bit
+  int code(int i=0) {
+    int p=predictor.p();
+    assert(p>=0 && p<4096);
+    p+=p<2048;
+    U32 xmid=x1 + (x2-x1>>12)*p + ((x2-x1&0xfff)*p>>12);
+    assert(xmid>=x1 && xmid<x2);
+    if (mode==DECOMPRESS) y=x<=xmid; else y=i;
+    y ? (x2=xmid) : (x1=xmid+1);
+    predictor.update();
+    while (((x1^x2)&0xff000000)==0) {  // pass equal leading bytes of range
+      if (mode==COMPRESS) putc(x2>>24, archive);
+      x1<<=8;
+      x2=(x2<<8)+255;
+      if (mode==DECOMPRESS) x=(x<<8)+(getc(archive)&255);  // EOF is OK
+    }
+    return y;
+  }
+
+public:
+  Encoder(Mode m, FILE* f);
+  Mode getMode() const {return mode;}
+  long size() const {return ftell(archive);}  // length of archive so far
+  void flush();  // call this when compression is finished
+  void setFile(FILE* f) {alt=f;}
+
+  // Compress one byte
+  void compress(int c) {
+    assert(mode==COMPRESS);
+    if (level==0)
+      putc(c, archive);
+    else 
+      for (int i=7; i>=0; --i)
+        code((c>>i)&1);
+  }
+
+  // Decompress and return one byte
+  int decompress() {
+    if (mode==COMPRESS) {
+      assert(alt);
+      return getc(alt);
+    }
+    else if (level==0)
+      return getc(archive);
+    else {
+      int c=0;
+      for (int i=0; i<8; ++i)
+        c+=c+code();
+      return c;
+    }
+  }
+};
+
+Encoder::Encoder(Mode m, FILE* f): 
+    mode(m), archive(f), x1(0), x2(0xffffffff), x(0), alt(0) {
+  if (level>0 && mode==DECOMPRESS) {  // x = first 4 bytes of archive
+    for (int i=0; i<4; ++i)
+      x=(x<<8)+(getc(archive)&255);
+  }
+}
+
+void Encoder::flush() {
+  if (mode==COMPRESS && level>0)
+    putc(x1>>24, archive);  // Flush first unequal byte of range
+}
+
+/////////////////////////// Filters /////////////////////////////////
+//
+// Before compression, data is encoded in blocks with the following format:
+//
+//   <type> <size> <encoded-data>
+//
+// Type is 1 byte (type Filetype): DEFAULT=0, JPEG, EXE
+// Size is 4 bytes in big-endian format.
+// Encoded-data decodes to <size> bytes.  The encoded size might be
+// different.  Encoded data is designed to be more compressible.
+//
+//   void encode(FILE* in, FILE* out, int n);
+//
+// Reads n bytes of in (open in "rb" mode) and encodes one or
+// more blocks to temporary file out (open in "wb+" mode).
+// The file pointer of in is advanced n bytes.  The file pointer of
+// out is positioned after the last byte written.
+//
+//   en.setFile(FILE* out);
+//   int decode(Encoder& en);
+//
+// Decodes and returns one byte.  Input is from en.decompress(), which
+// reads from out if in COMPRESS mode.  During compression, n calls
+// to decode() must exactly match n bytes of in, or else it is compressed
+// as type 0 without encoding.
+//
+//   Filetype detect(FILE* in, int n, Filetype type);
+//
+// Reads n bytes of in, and detects when the type changes to
+// something else.  If it does, then the file pointer is repositioned
+// to the start of the change and the new type is returned.  If the type
+// does not change, then it repositions the file pointer n bytes ahead
+// and returns the old type.
+//
+// For each type X there are the following 2 functions:
+//
+//   void encode_X(FILE* in, FILE* out, int n, ...);
+//
+// encodes n bytes from in to out.
+//
+//   int decode_X(Encoder& en);
+//
+// decodes one byte from en and returns it.  decode() and decode_X()
+// maintain state information using static variables.
+
+// Detect EXE or JPEG data
+Filetype detect(FILE* in, int n, Filetype type) {
+  U32 buf1=0, buf0=0;  // last 8 bytes
+  long start=ftell(in);
+
+  // For EXE detection
+  Array<int> abspos(256),  // CALL/JMP abs. addr. low byte -> last offset
+    relpos(256);    // CALL/JMP relative addr. low byte -> last offset
+  int e8e9count=0;  // number of consecutive CALL/JMPs
+  int e8e9pos=0;    // offset of first CALL or JMP instruction
+  int e8e9last=0;   // offset of most recent CALL or JMP
+
+  // For JPEG detection
+  int soi=0, sof=0, sos=0;  // position where found
+
+  for (int i=0; i<n; ++i) {
+    int c=getc(in);
+    if (c==EOF) return (Filetype)(-1);
+    buf1=buf1<<8|buf0>>24;
+    buf0=buf0<<8|c;
+
+    // Detect JPEG by code SOI APPx (FF D8 FF Ex) followed by
+    // SOF0 (FF C0 xx xx 08) and SOS (FF DA) within a reasonable distance.
+    // Detect end by any code other than RST0-RST7 (FF D9-D7) or
+    // a byte stuff (FF 00).
+
+    if (i>=3 && (buf0&0xfffffff0)==0xffd8ffe0) soi=i;
+    if (soi && i-soi<0x10000 && (buf1&0xff)==0xff
+        && (buf0&0xff0000ff)==0xc0000008)
+      sof=i;
+    if (soi && sof && sof>soi && i-soi<0x10000 && i-sof<0x1000
+        && (buf0&0xffff)==0xffda) {
+      sos=i;
+      if (type!=JPEG) return fseek(in, start+soi-3, SEEK_SET), JPEG;
+    }
+    if (type==JPEG && sos && i>sos && (buf0&0xff00)==0xff00
+        && (buf0&0xff)!=0 && (buf0&0xf8)!=0xd0)
+      return DEFAULT;
+
+    // Detect EXE if the low order byte (little-endian) XX is more
+    // recently seen (and within 4K) if a relative to absolute address
+    // conversion is done in the context CALL/JMP (E8/E9) XX xx xx 00/FF
+    // 4 times in a row.  Detect end of EXE at the last
+    // place this happens when it does not happen for 64KB.
+
+    if ((buf1&0xfe)==0xe8 && (buf0+1&0xfe)==0) {
+      int r=buf0>>24;  // relative address low 8 bits
+      int a=(buf0>>24)+i&0xff;  // absolute address low 8 bits
+      int rdist=i-relpos[r];
+      int adist=i-abspos[a];
+      if (adist<rdist && adist<0x1000 && abspos[a]>5) {
+        e8e9last=i;
+        ++e8e9count;
+        if (e8e9pos==0 || e8e9pos>abspos[a]) e8e9pos=abspos[a];
+      }
+      else e8e9count=0;
+      if (type!=EXE && e8e9count>=4 && e8e9pos>5)
+        return fseek(in, start+e8e9pos-5, SEEK_SET), EXE;
+      abspos[a]=i;
+      relpos[r]=i;
+    }
+    if (type==EXE && i-e8e9last>0x1000)
+      return fseek(in, start+e8e9last, SEEK_SET), DEFAULT;
+  }
+  return type;
+}
+
+// Default encoding as self
+void encode_default(FILE* in, FILE* out, int len) {
+  while (len--) putc(getc(in), out);
+}
+
+int decode_default(Encoder& en) {
+  return en.decompress();
+}
+
+// JPEG encode as self.  The purpose is to shield jpegs from exe transform.
+void encode_jpeg(FILE* in, FILE* out, int len) {
+  while (len--) putc(getc(in), out);
+}
+
+int decode_jpeg(Encoder& en) {
+  return en.decompress();
+}
+
+// EXE transform: <encoded-size> <begin> <block>...
+// Encoded-size is 4 bytes, MSB first.
+// begin is the offset of the start of the input file, 4 bytes, MSB first.
+// Each block applies the e8e9 transform to strings falling entirely
+// within the block starting from the end and working backwards.
+// The 5 byte pattern is E8/E9 xx xx xx 00/FF (x86 CALL/JMP xxxxxxxx)
+// where xxxxxxxx is a relative address LSB first.  The address is
+// converted to an absolute address by adding the offset mod 2^25
+// (in range +-2^24).
+
+void encode_exe(FILE* in, FILE* out, int len, int begin) {
+  const int BLOCK=0x10000;
+  Array<U8> blk(BLOCK);
+  fprintf(out, "%c%c%c%c", len>>24, len>>16, len>>8, len); // size, MSB first
+  fprintf(out, "%c%c%c%c", begin>>24, begin>>16, begin>>8, begin); 
+
+  // Transform
+  for (int offset=0; offset<len; offset+=BLOCK) {
+    int size=min(len-offset, BLOCK);
+    int bytesRead=fread(&blk[0], 1, size, in);
+    if (bytesRead!=size) quit("encode_exe read error");
+    for (int i=bytesRead-1; i>=4; --i) {
+      if ((blk[i-4]==0xe8||blk[i-4]==0xe9) && (blk[i]==0||blk[i]==0xff)) {
+        int a=(blk[i-3]|blk[i-2]<<8|blk[i-1]<<16|blk[i]<<24)+offset+begin+i+1;
+        a<<=7;
+        a>>=7;
+        blk[i]=a>>24;
+        blk[i-1]=a>>16;
+        blk[i-2]=a>>8;
+        blk[i-3]=a;
+      }
+    }
+    fwrite(&blk[0], 1, bytesRead, out);
+  }
+}
+
+int decode_exe(Encoder& en) {
+  const int BLOCK=0x10000;  // block size
+  static int offset=0, q=0;  // decode state: file offset, queue size
+  static int size=0;  // where to stop coding
+  static int begin=0;  // offset in file
+  static U8 c[5];  // queue of last 5 bytes, c[0] at front
+
+  // Read size from first 4 bytes, MSB first
+  while (offset==size && q==0) {
+    offset=0;
+    size=en.decompress()<<24;
+    size|=en.decompress()<<16;
+    size|=en.decompress()<<8;
+    size|=en.decompress();
+    begin=en.decompress()<<24;
+    begin|=en.decompress()<<16;
+    begin|=en.decompress()<<8;
+    begin|=en.decompress();
+  }
+
+  // Fill queue
+  while (offset<size && q<5) {
+    memmove(c+1, c, 4);
+    c[0]=en.decompress();
+    ++q;
+    ++offset;
+  }
+
+  // E8E9 transform: E8/E9 xx xx xx 00/FF -> subtract location from x
+  if (q==5 && (c[4]==0xe8||c[4]==0xe9) && (c[0]==0||c[0]==0xff)
+      && ((offset-1^offset-5)&-BLOCK)==0) { // not crossing block boundary
+    int a=(c[3]|c[2]<<8|c[1]<<16|c[0]<<24)-offset-begin;
+    a<<=7;
+    a>>=7;
+    c[3]=a;
+    c[2]=a>>8;
+    c[1]=a>>16;
+    c[0]=a>>24;
+  }
+
+  // return oldest byte in queue
+  assert(q>0 && q<=5);
+  return c[--q];
+}
+
+
+
+// Split n bytes into blocks by type.  For each block, output
+// <type> <size> and call encode_X to convert to type X.
+void encode(FILE* in, FILE* out, int n) {
+  Filetype type=DEFAULT;
+  long begin=ftell(in);
+  while (n>0) {
+    Filetype nextType=detect(in, n, type);
+    long end=ftell(in);
+    fseek(in, begin, SEEK_SET);
+    int len=int(end-begin);
+    if (len>0) {
+      fprintf(out, "%c%c%c%c%c", type, len>>24, len>>16, len>>8, len);
+      switch(type) {
+        case JPEG: encode_jpeg(in, out, len); break;
+        case EXE:  encode_exe(in, out, len, begin); break;
+        default:   encode_default(in, out, len); break;
+      }
+    }
+    n-=len;
+    type=nextType;
+    begin=end;
+  }
+}
+
+// Decode <type> <len> <data>...
+int decode(Encoder& en) {
+  static Filetype type=DEFAULT;
+  static int len=0;
+  while (len==0) {
+    type=(Filetype)en.decompress();
+    len=en.decompress()<<24;
+    len|=en.decompress()<<16;
+    len|=en.decompress()<<8;
+    len|=en.decompress();
+    if (len<0) len=1;
+  }
+  --len;
+  switch (type) {
+    case JPEG: return decode_jpeg(en);
+    case EXE:  return decode_exe(en);
+    default:   return decode_default(en);
+  }
+}
+
+//////////////////// Compress, Decompress ////////////////////////////
+
+// Print progress: n is the number of bytes compressed or decompressed
+void printStatus(int n) {
+  if (n>0 && !(n&0x0fff))
+    printf("%12d\b\b\b\b\b\b\b\b\b\b\b\b", n), fflush(stdout);
+}
+
+// Compress a file
+void compress(const char* filename, long filesize, Encoder& en) {
+  assert(en.getMode()==COMPRESS);
+  assert(filename && filename[0]);
+  FILE *f=fopen(filename, "rb");
+  if (!f) perror(filename), quit();
+  long start=en.size();
+  printf("%s %ld -> ", filename, filesize);
+
+  // Transform and test in blocks
+  const int BLOCK=MEM*64;
+  for (int i=0; filesize>0; i+=BLOCK) {
+    int size=BLOCK;
+    if (size>filesize) size=filesize;
+    FILE* tmp=tmpfile();
+    if (!tmp) perror("tmpfile"), quit();
+    long savepos=ftell(f);
+    encode(f, tmp, size);
+
+    // Test transform
+    rewind(tmp);
+    en.setFile(tmp);
+    fseek(f, savepos, SEEK_SET);
+    long j;
+    int c1=0, c2=0;
+    for (j=0; j<size; ++j)
+      if ((c1=decode(en))!=(c2=getc(f))) break;
+
+    // Test fails, compress without transform
+    if (j!=size || getc(tmp)!=EOF) {
+      printf("Transform fails at %ld, input=%d decoded=%d, skipping...\n", i+j, c2, c1);
+      en.compress(0);
+      en.compress(size>>24);
+      en.compress(size>>16);
+      en.compress(size>>8);
+      en.compress(size);
+      fseek(f, savepos, SEEK_SET);
+      for (int j=0; j<size; ++j) {
+        printStatus(i+j);
+        en.compress(getc(f));
+      }
+    }
+
+    // Test succeeds, decode(encode(f)) == f, compress tmp
+    else {
+      rewind(tmp);
+      int c;
+      j=0;
+      while ((c=getc(tmp))!=EOF) {
+        printStatus(i+j++);
+        en.compress(c);
+      }
+    }
+    filesize-=size;
+    fclose(tmp);  // deletes
+  }
+  if (f) fclose(f);
+  printf("%-12ld\n", en.size()-start);
+}
+
+// Try to make a directory, return true if successful
+bool makedir(const char* dir) {
+#ifdef WINDOWS
+  return CreateDirectory(dir, 0)==TRUE;
+#else
+#ifdef UNIX
+  return mkdir(dir, 0777)==0;
+#else
+  return false;
+#endif
+#endif
+}
+
+// Decompress a file
+void decompress(const char* filename, long filesize, Encoder& en) {
+  assert(en.getMode()==DECOMPRESS);
+  assert(filename && filename[0]);
+
+  // Test if output file exists.  If so, then compare.
+  FILE* f=fopen(filename, "rb");
+  if (f) {
+    printf("Comparing %s %ld -> ", filename, filesize);
+    bool found=false;  // mismatch?
+    for (int i=0; i<filesize; ++i) {
+      printStatus(i);
+      int c1=found?EOF:getc(f);
+      int c2=decode(en);
+      if (c1!=c2 && !found) {
+        printf("differ at %d: file=%d archive=%d\n", i, c1, c2);
+        found=true;
+      }
+    }
+    if (!found && getc(f)!=EOF)
+      printf("file is longer\n");
+    else if (!found)
+      printf("identical   \n");
+    fclose(f);
+  }
+
+  // Create file
+  else {
+    f=fopen(filename, "wb");
+    if (!f) {  // Try creating directories in path and try again
+      String path(filename);
+      for (int i=0; path[i]; ++i) {
+        if (path[i]=='/' || path[i]=='\\') {
+          char savechar=path[i];
+          path[i]=0;
+          if (makedir(path.c_str()))
+            printf("Created directory %s\n", path.c_str());
+          path[i]=savechar;
+        }
+      }
+      f=fopen(filename, "wb");
+    }
+
+    // Decompress
+    if (f) {
+      printf("Extracting %s %ld -> ", filename, filesize);
+      for (int i=0; i<filesize; ++i) {
+        printStatus(i);
+        putc(decode(en), f);
+      }
+      fclose(f);
+      printf("done        \n");
+    }
+
+    // Can't create, discard data
+    else {
+      perror(filename);
+      printf("Skipping %s %ld -> ", filename, filesize);
+      for (int i=0; i<filesize; ++i) {
+        printStatus(i);
+        decode(en);
+      }
+      printf("not extracted\n");
+    }
+  }
+}
+
+//////////////////////////// User Interface ////////////////////////////
+
+// Read one line, return NULL at EOF or ^Z.  f may be opened ascii or binary.
+// Trailing \r\n is dropped.  Line length is unlimited.
+
+const char* getline(FILE *f=stdin) {
+  static String s;
+  int len=0, c;
+  while ((c=getc(f))!=EOF && c!=26 && c!='\n') {
+    if (len>=s.size()) s.resize(len*2+1);
+    if (c!='\r') s[len++]=c;
+  }
+  if (len>=s.size()) s.resize(len+1);
+  s[len]=0;
+  if (c==EOF || c==26)
+    return 0;
+  else
+    return s.c_str();
+}
+
+// int expand(String& archive, String& s, const char* fname, int base) {
+// Given file name fname, print its length and base name (beginning
+// at fname+base) to archive in format "%ld\t%s\r\n" and append the
+// full name (including path) to String s in format "%s\n".  If fname
+// is a directory then substitute all of its regular files and recursively
+// expand any subdirectories.  Base initially points to the first
+// character after the last / in fname, but in subdirectories includes
+// the path from the topmost directory.  Return the number of files
+// whose names are appended to s and archive.
+
+// Same as expand() except fname is an ordinary file
+int putsize(String& archive, String& s, const char* fname, int base) {
+  int result=0;
+  FILE *f=fopen(fname, "rb");
+  if (f) {
+    fseek(f, 0, SEEK_END);
+    long len=ftell(f);
+    if (len>=0) {
+      static char blk[24];
+      sprintf(blk, "%ld\t", len);
+      archive+=blk;
+      archive+=(fname+base);
+      archive+="\r\n";
+      s+=fname;
+      s+="\n";
+      ++result;
+    }
+    fclose(f);
+  }
+  return result;
+}
+
+#ifdef WINDOWS
+
+int expand(String& archive, String& s, const char* fname, int base) {
+  int result=0;
+  DWORD attr=GetFileAttributes(fname);
+  if ((attr != 0xFFFFFFFF) && (attr & FILE_ATTRIBUTE_DIRECTORY)) {
+    WIN32_FIND_DATA ffd;
+    String fdir(fname);
+    fdir+="/*";
+    HANDLE h=FindFirstFile(fdir.c_str(), &ffd);
+    while (h!=INVALID_HANDLE_VALUE) {
+      if (!equals(ffd.cFileName, ".") && !equals(ffd.cFileName, "..")) {
+        String d(fname);
+        d+="/";
+        d+=ffd.cFileName;
+        result+=expand(archive, s, d.c_str(), base);
+      }
+      if (FindNextFile(h, &ffd)!=TRUE) break;
+    }
+    FindClose(h);
+  }
+  else // ordinary file
+    result=putsize(archive, s, fname, base);
+  return result;
+}
+
+#else
+#ifdef UNIX
+
+int expand(String& archive, String& s, const char* fname, int base) {
+  int result=0;
+  struct stat sb;
+  if (stat(fname, &sb)<0) return 0;
+
+  // If a regular file and readable, get file size
+  if (sb.st_mode & S_IFREG && sb.st_mode & 0400)
+    result+=putsize(archive, s, fname, base);
+
+  // If a directory with read and execute permission, traverse it
+  else if (sb.st_mode & S_IFDIR && sb.st_mode & 0400 && sb.st_mode & 0100) {
+    DIR *dirp=opendir(fname);
+    if (!dirp) {
+      perror("opendir");
+      return result;
+    }
+    dirent *dp;
+    while(errno=0, (dp=readdir(dirp))!=0) {
+      if (!equals(dp->d_name, ".") && !equals(dp->d_name, "..")) {
+        String d(fname);
+        d+="/";
+        d+=dp->d_name;
+        result+=expand(archive, s, d.c_str(), base);
+      }
+    }
+    if (errno) perror("readdir");
+    closedir(dirp);
+  }
+  else printf("%s is not a readable file or directory\n", fname);
+  return result;
+}
+
+#else  // Not WINDOWS or UNIX, ignore directories
+
+int expand(String& archive, String& s, const char* fname, int base) {
+  return putsize(archive, s, fname, base);
+}  
+
+#endif
+#endif
+
+
+// To compress to file1.paq8l: paq8l [-n] file1 [file2...]
+// To decompress: paq8l file1.paq8l [output_dir]
+int main(int argc, char** argv) {
+  bool pause=argc<=2;  // Pause when done?
+  try {
+
+    // Get option
+    bool doExtract=false;  // -d option
+    if (argc>1 && argv[1][0]=='-' && argv[1][1] && !argv[1][2]) {
+      if (argv[1][1]>='0' && argv[1][1]<='9')
+        level=argv[1][1]-'0';
+      else if (argv[1][1]=='d')
+        doExtract=true;
+      else
+        quit("Valid options are -0 through -9 or -d\n");
+      --argc;
+      ++argv;
+      pause=false;
+    }
+
+    // Print help message
+    if (argc<2) {
+      printf(PROGNAME " archiver (C) 2006, Matt Mahoney et al.\n"
+        "Free under GPL, http://www.gnu.org/licenses/gpl.txt\n\n"
+#ifdef WINDOWS
+        "To compress or extract, drop a file or folder on the "
+        PROGNAME " icon.\n"
+        "The output will be put in the same folder as the input.\n"
+        "\n"
+        "Or from a command window: "
+#endif
+        "To compress:\n"
+        "  " PROGNAME " -level file               (compresses to file." PROGNAME ")\n"
+        "  " PROGNAME " -level archive files...   (creates archive." PROGNAME ")\n"
+        "  " PROGNAME " file                      (level -%d, pause when done)\n"
+        "level: -0 = store, -1 -2 -3 = faster (uses 35, 48, 59 MB)\n"
+        "-4 -5 -6 -7 -8 = smaller (uses 133, 233, 435, 837, 1643 MB)\n"
+#if defined(WINDOWS) || defined (UNIX)
+        "You may also compress directories.\n"
+#endif
+        "\n"
+        "To extract or compare:\n"
+        "  " PROGNAME " -d dir1/archive." PROGNAME "      (extract to dir1)\n"
+        "  " PROGNAME " -d dir1/archive." PROGNAME " dir2 (extract to dir2)\n"
+        "  " PROGNAME " archive." PROGNAME "              (extract, pause when done)\n"
+        "\n"
+        "To view contents: more < archive." PROGNAME "\n"
+        "\n",
+        DEFAULT_OPTION);
+      quit();
+    }
+
+    FILE* archive=0;  // compressed file
+    int files=0;  // number of files to compress/decompress
+    Array<char*> fname(1);  // file names (resized to files)
+    Array<long> fsize(1);   // file lengths (resized to files)
+
+    // Compress or decompress?  Get archive name
+    Mode mode=COMPRESS;
+    String archiveName(argv[1]);
+    {
+      const int prognamesize=strlen(PROGNAME);
+      const int arg1size=strlen(argv[1]);
+      if (arg1size>prognamesize+1 && argv[1][arg1size-prognamesize-1]=='.'
+          && equals(PROGNAME, argv[1]+arg1size-prognamesize)) {
+        mode=DECOMPRESS;
+      }
+      else if (doExtract)
+        mode=DECOMPRESS;
+      else {
+        archiveName+=".";
+        archiveName+=PROGNAME;
+      }
+    }
+   
+    // Compress: write archive header, get file names and sizes
+    String filenames;
+    if (mode==COMPRESS) {
+
+      // Expand filenames to read later.  Write their base names and sizes
+      // to archive.
+      String header_string;
+      for (int i=1; i<argc; ++i) {
+        String name(argv[i]);
+        int len=name.size()-1;
+        for (int j=0; j<=len; ++j)  // change \ to /
+          if (name[j]=='\\') name[j]='/';
+        while (len>0 && name[len-1]=='/')  // remove trailing /
+          name[--len]=0;
+        int base=len-1;
+        while (base>=0 && name[base]!='/') --base;  // find last /
+        ++base;
+        if (base==0 && len>=2 && name[1]==':') base=2;  // chop "C:"
+        int expanded=expand(header_string, filenames, name.c_str(), base);
+        if (!expanded && (i>1||argc==2))
+          printf("%s: not found, skipping...\n", name.c_str());
+        files+=expanded;
+      }
+
+      // If archive doesn't exist and there is at least one file to compress
+      // then create the archive header.
+      if (files<1) quit("Nothing to compress\n");
+//      archive=fopen(archiveName.c_str(), "rb");
+//      if (archive)
+//        printf("%s already exists\n", archiveName.c_str()), quit();
+      archive=fopen(archiveName.c_str(), "wb+");
+      if (!archive) perror(archiveName.c_str()), quit();
+      fprintf(archive, PROGNAME " -%d\r\n%s\x1A",
+        level, header_string.c_str());
+      printf("Creating archive %s with %d file(s)...\n",
+        archiveName.c_str(), files);
+
+      // Fill fname[files], fsize[files] with input filenames and sizes
+      fname.resize(files);
+      fsize.resize(files);
+      char *p=&filenames[0];
+      rewind(archive);
+      getline(archive);
+      for (int i=0; i<files; ++i) {
+        const char *num=getline(archive);
+        assert(num);
+        fsize[i]=atol(num);
+        assert(fsize[i]>=0);
+        fname[i]=p;
+        while (*p!='\n') ++p;
+        assert(p-filenames.c_str()<filenames.size());
+        *p++=0;
+      }
+      fseek(archive, 0, SEEK_END);
+    }
+
+    // Decompress: open archive for reading and store file names and sizes
+    if (mode==DECOMPRESS) {
+      archive=fopen(archiveName.c_str(), "rb+");
+      if (!archive) perror(archiveName.c_str()), quit();
+
+      // Check for proper format and get option
+      const char* header=getline(archive);
+      if (strncmp(header, PROGNAME " -", strlen(PROGNAME)+2))
+        printf("%s: not a %s file\n", archiveName.c_str(), PROGNAME), quit();
+      level=header[strlen(PROGNAME)+2]-'0';
+      if (level<0||level>9) level=DEFAULT_OPTION;
+
+      // Fill fname[files], fsize[files] with output file names and sizes
+      while (getline(archive)) ++files;  // count files
+      printf("Extracting %d file(s) from %s -%d\n", files,
+        archiveName.c_str(), level);
+      long header_size=ftell(archive);
+      filenames.resize(header_size+4);  // copy of header
+      rewind(archive);
+      fread(&filenames[0], 1, header_size, archive);
+      fname.resize(files);
+      fsize.resize(files);
+      char* p=&filenames[0];
+      while (*p && *p!='\r') ++p;  // skip first line
+      ++p;
+      for (int i=0; i<files; ++i) {
+        fsize[i]=atol(p+1);
+        while (*p && *p!='\t') ++p;
+        fname[i]=p+1;
+        while (*p && *p!='\r') ++p;
+        if (!*p) printf("%s: header corrupted at %d\n", archiveName.c_str(),
+          p-&filenames[0]), quit();
+        assert(p-&filenames[0]<header_size);
+        *p++=0;
+      }
+    }
+        
+    // Set globals according to option
+    assert(level>=0 && level<=9);
+    buf.setsize(MEM*8);
+
+    // Compress or decompress files
+    assert(fname.size()==files);
+    assert(fsize.size()==files);
+    long total_size=0;  // sum of file sizes
+    for (int i=0; i<files; ++i) total_size+=fsize[i];
+    Encoder en(mode, archive);
+    if (mode==COMPRESS) {
+      for (int i=0; i<files; ++i)
+        compress(fname[i], fsize[i], en);
+      en.flush();
+      printf("%ld -> %ld\n", total_size, en.size());
+    }
+
+    // Decompress files to dir2: paq8l -d dir1/archive.paq8l dir2
+    // If there is no dir2, then extract to dir1
+    // If there is no dir1, then extract to .
+    else {
+      assert(argc>=2);
+      String dir(argc>2?argv[2]:argv[1]);
+      if (argc==2) {  // chop "/archive.paq8l"
+        int i;
+        for (i=dir.size()-2; i>=0; --i) {
+          if (dir[i]=='/' || dir[i]=='\\') {
+            dir[i]=0;
+            break;
+          }
+          if (i==1 && dir[i]==':') {  // leave "C:"
+            dir[i+1]=0;
+            break;
+          }
+        }
+        if (i==-1) dir=".";  // "/" not found
+      }
+      dir=dir.c_str();
+      if (dir[0] && (dir.size()!=3 || dir[1]!=':')) dir+="/";
+      for (int i=0; i<files; ++i) {
+        String out(dir.c_str());
+        out+=fname[i];
+        decompress(out.c_str(), fsize[i], en);
+      }
+    }
+    fclose(archive);
+    programChecker.print();
+  }
+  catch(const char* s) {
+    if (s) printf("%s\n", s);
+  }
+  if (pause) {
+    printf("\nClose this window or press ENTER to continue...\n");
+    getchar();
+  }
+  return 0;
+}
+
+
diff --git a/paq8l.exe b/paq8l.exe
new file mode 100755
index 0000000..2d6a186
Binary files /dev/null and b/paq8l.exe differ
diff --git a/paq9a.cpp b/paq9a.cpp
new file mode 100755
index 0000000..58552af
--- /dev/null
+++ b/paq9a.cpp
@@ -0,0 +1,1222 @@
+/* paq9a archiver, Dec. 31, 2007 (C) 2007, Matt Mahoney
+
+    LICENSE
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 3 of
+    the License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details at
+    Visit <http://www.gnu.org/copyleft/gpl.html>.
+
+paq9a is an experimental file compressor and archiver.  Usage:
+
+  paq9a {a|x|l} archive [[-opt] files...]...
+
+Commands:
+
+  a = create archive and compress named files.
+  x = extract from archive.
+  l = list contents.
+
+Archives are "solid".  You can only create new archives.  You cannot
+modify existing archives.  File names are stored and extracted exactly as
+named when the archive is created, but you have the option to rename them
+during extraction.  Files are never clobbered.
+
+The "a" command creates a new archive and adds the named files.
+Wildcards are permitted if compiled with g++.  Options
+and filenames may be in any order.  Options apply only to filenames
+after the option, and override previous options.  Options are:
+
+  -s = store without compression.
+  -c = compress (default).
+  -1 through -9 selects memory level from 18 MB to 1.5 GB  Default is -7
+     using 405 MB.  The memory option must be set before the first file.
+     Decompression requires the same amount of memory.
+
+For example:
+
+  paq9a a foo.paq9a a.txt -3 -s b.txt -c c.txt tmp/d.txt /tmp/e.txt
+
+creates the archive foo.paq9a with 5 files.  The file b.txt is
+stored without compression.  The other 4 files are compressed
+at memory level 3.  Extraction requires the same memory as compression.
+
+If any named file does not exist, then it is omitted from the archive
+with a warning and the remaining files are added.  An existing
+archive cannot be overwritten.  There must be at least one filename on
+the command line.
+
+The "x" command extracts the archive contents, creating files exactly
+as named when the archive was created.  Files cannot be overwritten.
+If a file already exists or cannot be created, then it is skipped.
+For example, "tmp/d.txt" would be skipped if either the current
+directory does not have a subdirectory tmp, or tmp is write
+protected, or tmp/d.txt already exists.
+
+If "x" is followed by one or more file names, then the output files
+are renamed in the order they were added to the archive and any remaining
+contents are extracted without renaming.  For example:
+
+  paq9a x foo.paq9a x.txt y.txt
+
+would extract a.txt to x.txt and b.txt to y.txt, then extract c.txt, 
+tmp/d.txt and /tmp/e.txt.  If the command line has more filenames than
+the archive then the extra arguments are ignored.  Options are not
+allowed.
+
+The "l" (letter l) command lists the contents.  Any extra arguments
+are ignored.
+
+Any other command, or no command, displays a help message.
+
+
+ARCHIVE FORMAT
+
+  "lPq" 1 mem [filename {'\0' mode usize csize contents}...]...
+
+The first 4 bytes are "lPq\x01" (1 is the version number).
+
+mem is a digit '1' through '9', where '9' uses the most memory (1.5 GB).
+
+A file is stored as one or more blocks.  The filename is stored
+only in the first block as a NUL terminated string.  Subsequent
+blocks start with a 0.
+
+The mode is 's' if the block is stored and 'c' if compressed.
+
+usize = uncompressed size as a 4 byte big-endian number (MSB first).
+
+csize = compressed size as a 4 byte big-endian number.
+
+The contents is copied from the file itself if mode is 's' or the
+compressed contents otherwise.  Its length is exactly csize bytes.
+
+
+COMPRESSED FORMAT
+
+Files are preprocessed with LZP and then compressed with a context
+mixing compressor and arithmetic coded one bit at a time.  Model
+contents are maintained across files.
+
+The LZP stage predicts the next byte by matching the current context
+(order 12 or higher) to a rotating buffer.  If a match is found
+then the next byte after the match is predicted.  If the next byte
+matches the prediction, then a 1 bit is coded and the context is extended.
+Otherwise a 0 is coded followed by 8 bits of the actual byte in MSB to 
+LSB order.
+
+A 1 bit is modeled using the match length as context, then refined
+in 3 stages using sucessively longer contexts.  The predictions are 
+adjusted by 2 input neurons selected by a context hash with the second 
+input fixed.
+
+If the LZP prediction is missed, then the literal is coded using a chain
+of predicions which are mixed using neurons, where one input is the
+previous prediction and the second input is the prediction given the
+current context.  The current context is mapped to an 8 bit state
+representing the bit history, the sequence of bits previously observed
+in that context.  The bit history is used both to select the neuron
+and is mapped to a prediction that provides the second input.  In addition,
+if the known bits of the current byte match the LZP incorrectly predicted
+byte, then this fact is used to select one of 2 sets of neurons (512 total).
+
+The contexts, in order, are sparse order-1 with gaps of 3, 2, and 1
+byte, then orders 1 through 6, then word orders 0 and 1, where a word
+is a sequenece of case insensitive letters (useful for compressing text).
+Contexts longer than 1 are hashed.  Order-n contexts consist of a hash
+of the last n bytes plus the 0 to 7 known bits of the current byte.
+The order 6 context and the word order 0 and 1 contexts also include
+the LZP predicted byte.
+
+All mixing is in the logistic or "stretched" domain: stretch(p) = ln(p/(1-p)),
+then "squashed" by the inverse function: squash(p) = 1/(1 + exp(-p)) before
+arithmetic coding.  A 2 input neuron has 2 weights (w0 and w1)
+selected by context.  Given inputs x0 and x1 (2 predictions, or one
+prediction and a constant), the output prediction is computed:
+p = w0*x0 + w1*x1.  If the actual bit is y, then the weights are updated
+to minimize its coding cost:
+
+  error = y - squash(p)
+  w0 += x0 * error * L
+  w1 += x1 * error * L
+
+where L is the learning rate, normally 1/256, but increased by a factor
+of 4 an 2 for the first 2 training cycles (using the 2 low bits
+of w0 as a counter).  In the implementation, p is represented by a fixed
+point number with a 12 bit fractional part in the linear domain (0..4095)
+and 8 bits in the logistic domain (-2047..2047 representing -8..8).
+Weights are scaled by 24 bits.  Both weights are initialized to 1/2,
+expecting 2 probabilities, weighted equally).  However, when one input
+(x0) is fixed, its weight (w0) is initialized to 0.
+
+A bit history represents the sequence of 0 and 1 bits observed in a given
+context.  An 8 bit state represents all possible sequences up to 4 bits
+long.  Longer sequences are represented by a count of 0 and 1 bits, plus
+an indicator of the most recent bit.  If counts grow too large, then the
+next state represents a pair of smaller counts with about the same ratio.
+The state table is the same as used in PAQ8 (all versions) and LPAQ1.
+
+A state is mapped to a prediction by using a table.  A table entry
+contains 2 values, p, initialized to 1/2, and n, initialized to 0.
+The output prediciton is p (in the linear domain, not stretched).
+If the actual bit is y, then the entry is updated:
+
+  error = y - p
+  p += error/(n + 1.5)
+  if n < limit then n += 1
+
+In practice, p is scaled by 22 bits, and n is 10 bits, packed into
+one 32 bit integer.  The limit is 255.
+
+Every 4 bits, contexts are mapped to arrays of 15 states using a 
+hash table.  The first element is the bit history for the current
+context ending on a half byte boundary, followed by all possible
+contexts formed by appending up to 3 more bits.
+
+A hash table accepts a 32 bit context, which must be a hash if
+longer than 4 bytes.  The input is further hashed and divided into
+an index (depending on the table size, a power of 2), and an 8 bit
+checksum which is stored in the table and used to detect collisions
+(not perfectly).  A lookup tests 3 adjacent locations within a single
+64 byte cache line, and if a matching checksum is not found, then the
+entry with the smallest value in the first data element is replaced
+(LFU replacement policy).  This element represents a bit history
+for a context ending on a half byte boundary.  The states are ordered
+so that larger values represent larger total bit counts, which
+estimates the likelihood of future use.  The initial state is 0.
+
+Memory is allocated from MEM = pow(2, opt+22) bytes, where opt is 1 through
+9 (user selected).  Of this, MEM/2 is for the hash table for storing literal
+context states, MEM/8 for the rotating LZP buffer, and MEM/8 for a 
+hash table of pointers into the buffer, plus 12 MB for miscellaneous data.
+Total memory usage is 0.75*MEM + 12 MB.
+
+
+ARITHMETIC CODING
+
+The arithmetic coder codes a bit with probability p using log2(1/p) bits.
+Given input string y, the output is a binary fraction x such that
+P(< y) <= x < P(<= y) where P(< y) means the total probability of all inputs
+lexicographically less than y and P(<= y) = P(< y) + P(y).  Note that one
+can always find x with length at most log2(P(y)) + 1 bits.
+
+x can be computed efficiently by maintaining a range, low <= x < high
+(initially 0..1) and expressing P(y) as a product of predictions:
+P(y) = P(y1) P(y2|y1) P(y3|y1y2) P(y4|y1y2y3) ... P(yn|y1y2...yn-1)
+where the term P(yi|y0y1...yi-1) means the probability that yi is 1
+given the context y1...yi-1, the previous i-1 bits of y.  For each
+prediction p, the range is split in proportion to the probabilities
+of 0 and 1, then updated by taking the half corresponding to the actual
+bit y as the new range, i.e.
+
+  mid = low + (high - low) * p(y = 1)
+  if y = 0 then (low, high) := (mid, high)
+  if y = 1 then (low, high) := (low, mid)
+
+As low and high approach each other, the high order bits of x become
+known (because they are the same throughout the range) and can be
+output immediately.
+
+For decoding, the range is split as before and the range is updated
+to the half containing x.  The corresponding bit y is used to update
+the model.  Thus, the model has the same knowledge for coding and
+decoding.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <ctype.h>
+#define NDEBUG  // remove for debugging
+#include <assert.h>
+
+int allocated=0;  // Total memory allocated by alloc()
+
+// Create an array p of n elements of type T
+template <class T> void alloc(T*&p, int n) {
+  p=(T*)calloc(n, sizeof(T));
+  if (!p) printf("Out of memory\n"), exit(1);
+  allocated+=n*sizeof(T);
+}
+
+// 8, 16, 32 bit unsigned types (adjust as appropriate)
+typedef unsigned char  U8;
+typedef unsigned short U16;
+typedef unsigned int   U32;
+
+///////////////////////////// Squash //////////////////////////////
+
+// return p = 1/(1 + exp(-d)), d scaled by 8 bits, p scaled by 12 bits
+class Squash {
+  short tab[4096];
+public:
+  Squash();
+  int operator()(int d) {
+    d+=2048;
+    if (d<0) return 0;
+    else if (d>4095) return 4095;
+    else return tab[d];
+  }
+} squash;
+
+Squash::Squash() {
+  static const int t[33]={
+    1,2,3,6,10,16,27,45,73,120,194,310,488,747,1101,
+    1546,2047,2549,2994,3348,3607,3785,3901,3975,4022,
+    4050,4068,4079,4085,4089,4092,4093,4094};
+  for (int i=-2048; i<2048; ++i) {
+    int w=i&127;
+    int d=(i>>7)+16;
+    tab[i+2048]=(t[d]*(128-w)+t[(d+1)]*w+64) >> 7;
+  }
+}
+
+//////////////////////////// Stretch ///////////////////////////////
+
+// Inverse of squash. stretch(d) returns ln(p/(1-p)), d scaled by 8 bits,
+// p by 12 bits.  d has range -2047 to 2047 representing -8 to 8.  
+// p has range 0 to 4095 representing 0 to 1.
+
+class Stretch {
+  short t[4096];
+public:
+  Stretch();
+  int operator()(int p) const {
+    assert(p>=0 && p<4096);
+    return t[p];
+  }
+} stretch;
+
+Stretch::Stretch() {
+  int pi=0;
+  for (int x=-2047; x<=2047; ++x) {  // invert squash()
+    int i=squash(x);
+    for (int j=pi; j<=i; ++j)
+      t[j]=x;
+    pi=i+1;
+  }
+  t[4095]=2047;
+}
+
+///////////////////////////// ilog //////////////////////////////
+
+// ilog(x) = round(log2(x) * 16), 0 <= x < 64K
+class Ilog {
+  U8* t;
+public:
+  int operator()(U16 x) const {return t[x];}
+  Ilog();
+} ilog;
+
+// Compute lookup table by numerical integration of 1/x
+Ilog::Ilog() {
+  alloc(t, 65536);
+  U32 x=14155776;
+  for (int i=2; i<65536; ++i) {
+    x+=774541002/(i*2-1);  // numerator is 2^29/ln 2
+    t[i]=x>>24;
+  }
+}
+
+// llog(x) accepts 32 bits
+inline int llog(U32 x) {
+  if (x>=0x1000000)
+    return 256+ilog(x>>16);
+  else if (x>=0x10000)
+    return 128+ilog(x>>8);
+  else
+    return ilog(x);
+}
+
+///////////////////////// state table ////////////////////////
+
+// State table:
+//   nex(state, 0) = next state if bit y is 0, 0 <= state < 256
+//   nex(state, 1) = next state if bit y is 1
+//
+// States represent a bit history within some context.
+// State 0 is the starting state (no bits seen).
+// States 1-30 represent all possible sequences of 1-4 bits.
+// States 31-252 represent a pair of counts, (n0,n1), the number
+//   of 0 and 1 bits respectively.  If n0+n1 < 16 then there are
+//   two states for each pair, depending on if a 0 or 1 was the last
+//   bit seen.
+// If n0 and n1 are too large, then there is no state to represent this
+// pair, so another state with about the same ratio of n0/n1 is substituted.
+// Also, when a bit is observed and the count of the opposite bit is large,
+// then part of this count is discarded to favor newer data over old.
+
+static const U8 State_table[256][2]={
+{  1,  2},{  3,  5},{  4,  6},{  7, 10},{  8, 12},{  9, 13},{ 11, 14}, // 0
+{ 15, 19},{ 16, 23},{ 17, 24},{ 18, 25},{ 20, 27},{ 21, 28},{ 22, 29}, // 7
+{ 26, 30},{ 31, 33},{ 32, 35},{ 32, 35},{ 32, 35},{ 32, 35},{ 34, 37}, // 14
+{ 34, 37},{ 34, 37},{ 34, 37},{ 34, 37},{ 34, 37},{ 36, 39},{ 36, 39}, // 21
+{ 36, 39},{ 36, 39},{ 38, 40},{ 41, 43},{ 42, 45},{ 42, 45},{ 44, 47}, // 28
+{ 44, 47},{ 46, 49},{ 46, 49},{ 48, 51},{ 48, 51},{ 50, 52},{ 53, 43}, // 35
+{ 54, 57},{ 54, 57},{ 56, 59},{ 56, 59},{ 58, 61},{ 58, 61},{ 60, 63}, // 42
+{ 60, 63},{ 62, 65},{ 62, 65},{ 50, 66},{ 67, 55},{ 68, 57},{ 68, 57}, // 49
+{ 70, 73},{ 70, 73},{ 72, 75},{ 72, 75},{ 74, 77},{ 74, 77},{ 76, 79}, // 56
+{ 76, 79},{ 62, 81},{ 62, 81},{ 64, 82},{ 83, 69},{ 84, 71},{ 84, 71}, // 63
+{ 86, 73},{ 86, 73},{ 44, 59},{ 44, 59},{ 58, 61},{ 58, 61},{ 60, 49}, // 70
+{ 60, 49},{ 76, 89},{ 76, 89},{ 78, 91},{ 78, 91},{ 80, 92},{ 93, 69}, // 77
+{ 94, 87},{ 94, 87},{ 96, 45},{ 96, 45},{ 48, 99},{ 48, 99},{ 88,101}, // 84
+{ 88,101},{ 80,102},{103, 69},{104, 87},{104, 87},{106, 57},{106, 57}, // 91
+{ 62,109},{ 62,109},{ 88,111},{ 88,111},{ 80,112},{113, 85},{114, 87}, // 98
+{114, 87},{116, 57},{116, 57},{ 62,119},{ 62,119},{ 88,121},{ 88,121}, // 105
+{ 90,122},{123, 85},{124, 97},{124, 97},{126, 57},{126, 57},{ 62,129}, // 112
+{ 62,129},{ 98,131},{ 98,131},{ 90,132},{133, 85},{134, 97},{134, 97}, // 119
+{136, 57},{136, 57},{ 62,139},{ 62,139},{ 98,141},{ 98,141},{ 90,142}, // 126
+{143, 95},{144, 97},{144, 97},{ 68, 57},{ 68, 57},{ 62, 81},{ 62, 81}, // 133
+{ 98,147},{ 98,147},{100,148},{149, 95},{150,107},{150,107},{108,151}, // 140
+{108,151},{100,152},{153, 95},{154,107},{108,155},{100,156},{157, 95}, // 147
+{158,107},{108,159},{100,160},{161,105},{162,107},{108,163},{110,164}, // 154
+{165,105},{166,117},{118,167},{110,168},{169,105},{170,117},{118,171}, // 161
+{110,172},{173,105},{174,117},{118,175},{110,176},{177,105},{178,117}, // 168
+{118,179},{110,180},{181,115},{182,117},{118,183},{120,184},{185,115}, // 175
+{186,127},{128,187},{120,188},{189,115},{190,127},{128,191},{120,192}, // 182
+{193,115},{194,127},{128,195},{120,196},{197,115},{198,127},{128,199}, // 189
+{120,200},{201,115},{202,127},{128,203},{120,204},{205,115},{206,127}, // 196
+{128,207},{120,208},{209,125},{210,127},{128,211},{130,212},{213,125}, // 203
+{214,137},{138,215},{130,216},{217,125},{218,137},{138,219},{130,220}, // 210
+{221,125},{222,137},{138,223},{130,224},{225,125},{226,137},{138,227}, // 217
+{130,228},{229,125},{230,137},{138,231},{130,232},{233,125},{234,137}, // 224
+{138,235},{130,236},{237,125},{238,137},{138,239},{130,240},{241,125}, // 231
+{242,137},{138,243},{130,244},{245,135},{246,137},{138,247},{140,248}, // 238
+{249,135},{250, 69},{ 80,251},{140,252},{249,135},{250, 69},{ 80,251}, // 245
+{140,252},{  0,  0},{  0,  0},{  0,  0}};  // 252
+#define nex(state,sel) State_table[state][sel]
+
+//////////////////////////// StateMap //////////////////////////
+
+// A StateMap maps a context to a probability.  Methods:
+//
+// Statemap sm(n) creates a StateMap with n contexts using 4*n bytes memory.
+// sm.p(cx, limit) converts state cx (0..n-1) to a probability (0..4095)
+//     that the next updated bit y=1.
+//     limit (1..1023, default 255) is the maximum count for computing a
+//     prediction.  Larger values are better for stationary sources.
+// sm.update(y) updates the model with actual bit y (0..1).
+
+class StateMap {
+protected:
+  const int N;  // Number of contexts
+  int cxt;      // Context of last prediction
+  U32 *t;       // cxt -> prediction in high 22 bits, count in low 10 bits
+  static int dt[1024];  // i -> 16K/(i+3)
+public:
+  StateMap(int n=256);
+
+  // update bit y (0..1)
+  void update(int y, int limit=255) {
+    assert(cxt>=0 && cxt<N);
+    int n=t[cxt]&1023, p=t[cxt]>>10;  // count, prediction
+    if (n<limit) ++t[cxt];
+    else t[cxt]=t[cxt]&0xfffffc00|limit;
+    t[cxt]+=(((y<<22)-p)>>3)*dt[n]&0xfffffc00;
+  }
+
+  // predict next bit in context cx
+  int p(int cx) {
+    assert(cx>=0 && cx<N);
+    return t[cxt=cx]>>20;
+  }
+};
+
+int StateMap::dt[1024]={0};
+
+StateMap::StateMap(int n): N(n), cxt(0) {
+  alloc(t, N);
+  for (int i=0; i<N; ++i)
+    t[i]=1<<31;
+  if (dt[0]==0)
+    for (int i=0; i<1024; ++i)
+      dt[i]=16384/(i+i+3);
+}
+
+//////////////////////////// Mix, APM /////////////////////////
+
+// Mix combines 2 predictions and a context to produce a new prediction.
+// Methods:
+// Mix m(n) -- creates allowing with n contexts.
+// m.pp(p1, p2, cx) -- inputs 2 stretched predictions and a context cx
+//   (0..n-1) and returns a stretched prediction.  Stretched predictions
+//   are fixed point numbers with an 8 bit fraction, normally -2047..2047
+//   representing -8..8, such that 1/(1+exp(-p) is the probability that
+//   the next update will be 1.
+// m.update(y) updates the model after a prediction with bit y (0..1).
+
+class Mix {
+protected:
+  const int N;  // n
+  int* wt;  // weights, scaled 24 bits
+  int x1, x2;    // inputs, scaled 8 bits (-2047 to 2047)
+  int cxt;  // last context (0..n-1)
+  int pr;   // last output
+public:
+  Mix(int n=512);
+  int pp(int p1, int p2, int cx) {
+    assert(cx>=0 && cx<N);
+    cxt=cx*2;
+    return pr=(x1=p1)*(wt[cxt]>>16)+(x2=p2)*(wt[cxt+1]>>16)+128>>8;
+  }
+  void update(int y) {
+    assert(y==0 || y==1);
+    int err=((y<<12)-squash(pr));
+    if ((wt[cxt]&3)<3)
+      err*=4-(++wt[cxt]&3);
+    err=err+8>>4;
+    wt[cxt]+=x1*err&-4;
+    wt[cxt+1]+=x2*err;
+  }
+};
+
+Mix::Mix(int n): N(n), x1(0), x2(0), cxt(0), pr(0) {
+  alloc(wt, n*2);
+  for (int i=0; i<N*2; ++i)
+    wt[i]=1<<23;
+}
+
+// An APM is a Mix optimized for a constant in place of p1, used to
+// refine a stretched prediction given a context cx. 
+// Normally p1 is in the range (0..4095) and p2 is doubled.
+
+class APM: public Mix {
+public:
+  APM(int n);
+};
+
+APM::APM(int n): Mix(n) {
+  for (int i=0; i<n; ++i)
+    wt[2*i]=0;
+}
+
+//////////////////////////// HashTable /////////////////////////
+
+// A HashTable maps a 32-bit index to an array of B bytes.
+// The first byte is a checksum using the upper 8 bits of the
+// index.  The second byte is a priority (0 = empty) for hash
+// replacement.  The index need not be a hash.
+
+// HashTable<B> h(n) - create using n bytes  n and B must be 
+//     powers of 2 with n >= B*4, and B >= 2.
+// h[i] returns array [1..B-1] of bytes indexed by i, creating and
+//     replacing another element if needed.  Element 0 is the
+//     checksum and should not be modified.
+
+template <int B>
+class HashTable {
+  U8* t;  // table: 1 element = B bytes: checksum priority data data
+  const U32 N;  // size in bytes
+public:
+  HashTable(int n);
+  ~HashTable();
+  U8* operator[](U32 i);
+};
+
+template <int B>
+HashTable<B>::HashTable(int n): t(0), N(n) {
+  assert(B>=2 && (B&B-1)==0);
+  assert(N>=B*4 && (N&N-1)==0);
+  alloc(t, N+B*4+64);
+  t+=64-int(((long)t)&63);  // align on cache line boundary
+}
+
+template <int B>
+inline U8* HashTable<B>::operator[](U32 i) {
+  i*=123456791;
+  i=i<<16|i>>16;
+  i*=234567891;
+  int chk=i>>24;
+  i=i*B&N-B;
+  if (t[i]==chk) return t+i;
+  if (t[i^B]==chk) return t+(i^B);
+  if (t[i^B*2]==chk) return t+(i^B*2);
+  if (t[i+1]>t[i+1^B] || t[i+1]>t[i+1^B*2]) i^=B;
+  if (t[i+1]>t[i+1^B^B*2]) i^=B^B*2;
+  memset(t+i, 0, B);
+  t[i]=chk;
+  return t+i;
+}
+
+template <int B>
+HashTable<B>::~HashTable() {
+  int c=0, c0=0;
+  for (U32 i=0; i<N; ++i) {
+    if (t[i]) {
+      ++c;
+      if (i%B==0) ++c0;
+    }
+  }
+  printf("HashTable<%d> %1.4f%% full, %1.4f%% utilized of %d KiB\n",
+    B, 100.0*c0*B/N, 100.0*c/N, N>>10);
+}
+
+////////////////////////// LZP /////////////////////////
+
+U32 MEM=1<<29;  // Global memory limit, 1 << 22+(memory option)
+
+// LZP predicts the next byte and maintains context.  Methods:
+// c() returns the predicted byte for the next update, or -1 if none.
+// p() returns the 12 bit probability (0..4095) that c() is next.
+// update(ch) updates the model with actual byte ch (0..255).
+// c(i) returns the i'th prior byte of context, i > 0.
+// c4() returns the order 4 context, shifted into the LSB.
+// c8() returns a hash of the order 8 context, shifted 4 bits into LSB.
+// word0, word1 are hashes of the current and previous word (a-z).
+
+class LZP {
+private:
+  const int N, H; // buf, t sizes
+  enum {MINLEN=12};  // minimum match length
+  U8* buf;     // Rotating buffer of size N
+  U32* t;      // hash table of pointers in high 24 bits, state in low 8 bits
+  int match;   // start of match
+  int len;     // length of match
+  int pos;     // position of next ch to write to buf
+  U32 h;       // context hash
+  U32 h1;      // hash of last 8 byte updates, shifting 4 bits to MSB
+  U32 h2;      // last 4 updates, shifting 8 bits to MSB
+  StateMap sm1; // len+offset -> p
+  APM a1, a2, a3;   // p, context -> p
+  int literals, matches;  // statistics
+public:
+  U32 word0, word1;  // hashes of last 2 words (case insensitive a-z)
+  LZP();
+  ~LZP();
+  int c();     // predicted char
+  int c(int i);// context
+  int c4() {return h2;}  // order 4 context, c(1) in LSB
+  int c8() {return h1;}  // hashed order 8 context
+  int p();     // probability that next char is c() * 4096
+  void update(int ch);  // update model with actual char ch
+};
+
+// Initialize
+LZP::LZP(): N(MEM/8), H(MEM/32),
+    match(-1), len(0), pos(0), h(0), h1(0), h2(0), 
+    sm1(0x200), a1(0x10000), a2(0x40000), a3(0x100000),
+    literals(0), matches(0), word0(0), word1(0) {
+  assert(MEM>0);
+  assert(H>0);
+  alloc(buf, N);
+  alloc(t, H);
+}
+
+// Print statistics
+LZP::~LZP() {
+  int c=0;
+  for (int i=0; i<H; ++i)
+    c+=(t[i]!=0);
+  printf("LZP hash table %1.4f%% full of %d KiB\n"
+    "LZP buffer %1.4f%% full of %d KiB\n", 
+    100.0*c/H, H>>8, pos<N?100.0*pos/N:100.0, N>>10);
+  printf("LZP %d literals, %d matches (%1.4f%% matched)\n",
+    literals, matches, 
+    literals+matches>0?100.0*matches/(literals+matches):0.0);
+}
+
+// Predicted next byte, or -1 for no prediction
+inline int LZP::c() {
+  return len>=MINLEN ? buf[match&N-1] : -1;
+}
+
+// Return i'th byte of context (i > 0)
+inline int LZP::c(int i) {
+  assert(i>0);
+  return buf[pos-i&N-1];
+}
+
+// Return prediction that c() will be the next byte (0..4095)
+int LZP::p() {
+  if (len<MINLEN) return 0;
+  int cxt=len;
+  if (len>28) cxt=28+(len>=32)+(len>=64)+(len>=128);
+  int pc=c();
+  int pr=sm1.p(cxt);
+  pr=stretch(pr);
+  pr=a1.pp(2048, pr*2, h2*256+pc&0xffff)*3+pr>>2;
+  pr=a2.pp(2048, pr*2, h1*(11<<6)+pc&0x3ffff)*3+pr>>2;
+  pr=a3.pp(2048, pr*2, h1*(7<<4)+pc&0xfffff)*3+pr>>2;
+  pr=squash(pr);
+  return pr;
+}
+
+// Update model with predicted byte ch (0..255)
+void LZP::update(int ch) {
+  int y=c()==ch;     // 1 if prediction of ch was right, else 0
+  h1=h1*(3<<4)+ch+1; // update context hashes
+  h2=h2<<8|ch;
+  h=h*(5<<2)+ch+1&H-1;
+  if (len>=MINLEN) {
+    sm1.update(y);
+    a1.update(y);
+    a2.update(y);
+    a3.update(y);
+  }
+  if (isalpha(ch))
+    word0=word0*(29<<2)+tolower(ch);
+  else if (word0)
+    word1=word0, word0=0;
+  buf[pos&N-1]=ch;   // update buf
+  ++pos;
+  if (y) {  // extend match
+    ++len;
+    ++match;
+    ++matches;
+  }
+  else {  // find new match, try order 6 context first
+    ++literals;
+    y=0;
+    len=1;
+    match=t[h];
+    if (!((match^pos)&N-1)) --match;
+    while (len<=128 && buf[match-len&N-1]==buf[pos-len&N-1]) ++len;
+    --len;
+  }
+  t[h]=pos;
+}
+
+LZP* lzp=0;
+
+//////////////////////////// Predictor /////////////////////////
+
+// A Predictor estimates the probability that the next bit of
+// uncompressed data is 1.  Methods:
+// Predictor() creates.
+// p() returns P(1) as a 12 bit number (0-4095).
+// update(y) trains the predictor with the actual bit (0 or 1).
+
+class Predictor {
+  enum {N=11}; // number of contexts
+  int c0;      // last 0-7 bits with leading 1, 0 before LZP flag
+  int nibble;  // last 0-3 bits with leading 1 (1..15)
+  int bcount;  // number of bits in c0 (0..7)
+  HashTable<16> t;  // context -> state
+  StateMap sm[N];   // state -> prediction
+  U8* cp[N];   // i -> state array of bit histories for i'th context
+  U8* sp[N];   // i -> pointer to bit history for i'th context
+  Mix m[N-1];  // combines 2 predictions given a context
+  APM a1, a2, a3;  // adjusts a prediction given a context
+  U8* t2;      // order 1 contexts -> state
+
+public:
+  Predictor();
+  int p();
+  void update(int y);
+};
+
+// Initialize
+Predictor::Predictor():
+    c0(0), nibble(1), bcount(0), t(MEM/2),
+    a1(0x10000), a2(0x10000), a3(0x10000) {
+  alloc(t2, 0x40000);
+  for (int i=0; i<N; ++i)
+    sp[i]=cp[i]=t2;
+}
+
+// Update model
+void Predictor::update(int y) {
+  assert(y==0 || y==1);
+  assert(bcount>=0 && bcount<8);
+  assert(c0>=0 && c0<256);
+  assert(nibble>=1 && nibble<=15);
+  if (c0==0)
+    c0=1-y;
+  else {
+    *sp[0]=nex(*sp[0], y);
+    sm[0].update(y);
+    for (int i=1; i<N; ++i) {
+      *sp[i]=nex(*sp[i], y);
+      sm[i].update(y);
+      m[i-1].update(y);
+    }
+    c0+=c0+y;
+    if (++bcount==8) bcount=c0=0;
+    if ((nibble+=nibble+y)>=16) nibble=1;
+    a1.update(y);
+    a2.update(y);
+    a3.update(y);
+  }
+}
+
+// Predict next bit
+int Predictor::p() {
+  assert(lzp);
+  if (c0==0)
+    return lzp->p();
+  else {
+
+    // Set context pointers
+    int pc=lzp->c();  // mispredicted byte
+    int r=pc+256>>8-bcount==c0;  // c0 consistent with mispredicted byte?
+    U32 c4=lzp->c4();  // last 4 whole context bytes, shifted into LSB
+    U32 c8=(lzp->c8()<<4)-1;  // hash of last 7 bytes with 4 trailing 1 bits
+    if ((bcount&3)==0) {  // nibble boundary?  Update context pointers
+      pc&=-r;
+      U32 c4p=c4<<8;
+      if (bcount==0) {  // byte boundary?  Update order-1 context pointers
+        cp[0]=t2+(c4>>16&0xff00);
+        cp[1]=t2+(c4>>8 &0xff00)+0x10000;
+        cp[2]=t2+(c4    &0xff00)+0x20000;
+        cp[3]=t2+(c4<<8 &0xff00)+0x30000;
+      }
+      cp[4]=t[(c4p&0xffff00)-c0];
+      cp[5]=t[(c4p&0xffffff00)*3+c0];
+      cp[6]=t[c4*7+c0];
+      cp[7]=t[(c8*5&0xfffffc)+c0];
+      cp[8]=t[(c8*11&0xffffff0)+c0+pc*13];
+      cp[9]=t[lzp->word0*5+c0+pc*17];
+      cp[10]=t[lzp->word1*7+lzp->word0*11+c0+pc*37];
+    }
+
+    // Mix predictions
+    r<<=8;
+    sp[0]=&cp[0][c0];
+    int pr=stretch(sm[0].p(*sp[0]));
+    for (int i=1; i<N; ++i) {
+      sp[i]=&cp[i][i<4?c0:nibble];
+      int st=*sp[i];
+      pr=m[i-1].pp(pr, stretch(sm[i].p(st)), st+r)*3+pr>>2;
+    }
+    pr=a1.pp(512, pr*2, c0+pc*256&0xffff)*3+pr>>2;  // Adjust prediction
+    pr=a2.pp(512, pr*2, c4<<8&0xff00|c0)*3+pr>>2;
+    pr=a3.pp(512, pr*2, c4*3+c0&0xffff)*3+pr>>2;
+    return squash(pr);
+  }
+}
+
+Predictor* predictor=0;
+
+/////////////////////////// get4, put4 //////////////////////////
+
+// Read/write a 4 byte big-endian number
+int get4(FILE* in) {
+  int r=getc(in);
+  r=r*256+getc(in);
+  r=r*256+getc(in);
+  r=r*256+getc(in);
+  return r;
+}
+
+void put4(U32 c, FILE* out) {
+  fprintf(out, "%c%c%c%c", c>>24, c>>16, c>>8, c);
+}
+
+//////////////////////////// Encoder ////////////////////////////
+
+// An Encoder arithmetic codes in blocks of size BUFSIZE.  Methods:
+// Encoder(COMPRESS, f) creates encoder for compression to archive f, which
+//     must be open past any header for writing in binary mode.
+// Encoder(DECOMPRESS, f) creates encoder for decompression from archive f,
+//     which must be open past any header for reading in binary mode.
+// code(i) in COMPRESS mode compresses bit i (0 or 1) to file f.
+// code() in DECOMPRESS mode returns the next decompressed bit from file f.
+// count() should be called after each byte is compressed.
+// flush() should be called after compression is done.  It is also called
+//   automatically when a block is written.
+
+typedef enum {COMPRESS, DECOMPRESS} Mode;
+class Encoder {
+private:
+  const Mode mode;       // Compress or decompress?
+  FILE* archive;         // Compressed data file
+  U32 x1, x2;            // Range, initially [0, 1), scaled by 2^32
+  U32 x;                 // Decompress mode: last 4 input bytes of archive
+  enum {BUFSIZE=0x20000};
+  static unsigned char* buf; // Compression output buffer, size BUFSIZE
+  int usize, csize;      // Buffered uncompressed and compressed sizes
+  double usum, csum;     // Total of usize, csize
+
+public:
+  Encoder(Mode m, FILE* f);
+  void flush();  // call this when compression is finished
+
+  // Compress bit y or return decompressed bit
+  int code(int y=0) {
+    assert(predictor);
+    int p=predictor->p();
+    assert(p>=0 && p<4096);
+    p+=p<2048;
+    U32 xmid=x1 + (x2-x1>>12)*p + ((x2-x1&0xfff)*p>>12);
+    assert(xmid>=x1 && xmid<x2);
+    if (mode==DECOMPRESS) y=x<=xmid;
+    y ? (x2=xmid) : (x1=xmid+1);
+    predictor->update(y);
+    while (((x1^x2)&0xff000000)==0) {  // pass equal leading bytes of range
+      if (mode==COMPRESS) buf[csize++]=x2>>24;
+      x1<<=8;
+      x2=(x2<<8)+255;
+      if (mode==DECOMPRESS) x=(x<<8)+getc(archive);
+    }
+    return y;
+  }
+
+  // Count one byte
+  void count() {
+    assert(mode==COMPRESS);
+    ++usize;
+    if (csize>BUFSIZE-256)
+      flush();
+  }
+};
+unsigned char* Encoder::buf=0;
+
+// Create in mode m (COMPRESS or DECOMPRESS) with f opened as the archive.
+Encoder::Encoder(Mode m, FILE* f):
+    mode(m), archive(f), x1(0), x2(0xffffffff), x(0), 
+    usize(0), csize(0), usum(0), csum(0) {
+  if (mode==DECOMPRESS) {  // x = first 4 bytes of archive
+    for (int i=0; i<4; ++i)
+      x=(x<<8)+(getc(archive)&255);
+    csize=4;
+  }
+  else if (!buf)
+    alloc(buf, BUFSIZE);
+}
+
+// Write a compressed block and reinitialize the encoder.  The format is:
+//   uncompressed size (usize, 4 byte, MSB first)
+//   compressed size (csize, 4 bytes, MSB first)
+//   compressed data (csize bytes)
+void Encoder::flush() {
+  if (mode==COMPRESS) {
+    buf[csize++]=x1>>24;
+    buf[csize++]=255;
+    buf[csize++]=255;
+    buf[csize++]=255;
+    putc(0, archive);
+    putc('c', archive);
+    put4(usize, archive);
+    put4(csize, archive);
+    fwrite(buf, 1, csize, archive);
+    usum+=usize;
+    csum+=csize+10;
+    printf("%15.0f -> %15.0f"
+      "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", 
+      usum, csum);
+    x1=x=usize=csize=0;
+    x2=0xffffffff;
+  }
+}
+
+/////////////////////////// paq9a ////////////////////////////////
+
+// Compress or decompress from in to out, depending on whether mode
+// is COMPRESS or DECOMPRESS.  A byte c is encoded as a 1 bit if it
+// is predicted by LZP, otherwise a 0 followed by 8 bits from MSB to LSB.
+void paq9a(FILE* in, FILE* out, Mode mode) {
+  if (!lzp && !predictor) {
+    lzp=new LZP;
+    predictor=new Predictor;
+    printf("%8d KiB\b\b\b\b\b\b\b\b\b\b\b\b", allocated>>10);
+  }
+  if (mode==COMPRESS) {
+    Encoder e(COMPRESS, out);
+    int c;
+    while ((c=getc(in))!=EOF) {
+      int cp=lzp->c();
+      if (c==cp)
+        e.code(1);
+      else
+        for (int i=8; i>=0; --i)
+          e.code(c>>i&1);
+      e.count();
+      lzp->update(c);
+    }
+    e.flush();
+  }
+  else {  // DECOMPRESS
+    int usize=get4(in);
+    get4(in);  // csize
+    Encoder e(DECOMPRESS, in);
+    while (usize--) {
+      int c=lzp->c();
+      if (e.code()==0) {
+        c=1;
+        while (c<256) c+=c+e.code();
+        c&=255;
+      }
+      if (out) putc(c, out);
+      lzp->update(c);
+    }
+  }
+}
+
+
+///////////////////////////// store ///////////////////////////
+
+// Store a file in blocks as: {'\0' mode usize csize contents}...
+void store(FILE* in, FILE* out) {
+  assert(in);
+  assert(out);
+
+  // Store in blocks
+  const int BLOCKSIZE=0x100000;
+  static char* buf=0;
+  if (!buf) alloc(buf, BLOCKSIZE);
+  bool first=true;
+  while (true) {
+    int n=fread(buf, 1, BLOCKSIZE, in);
+    if (!first && n<=0) break;
+    fprintf(out, "%c%c", 0, 's');
+    put4(n, out);  // usize
+    put4(n, out);  // csize
+    fwrite(buf, 1, n, out);
+    first=false;
+  }
+
+  // Close file
+  fclose(in);
+}
+
+// Write usize == csize bytes of an uncompressed block from in to out
+void unstore(FILE* in, FILE* out) {
+  assert(in);
+  int usize=get4(in);
+  int csize=get4(in);
+  if (usize!=csize)
+    printf("Bad archive format: usize=%d csize=%d\n", usize, csize);
+  static char* buf=0;
+  const int BUFSIZE=0x1000;
+  if (!buf) alloc(buf, BUFSIZE);
+  while (csize>0) {
+    usize=csize;
+    if (usize>BUFSIZE) usize=BUFSIZE;
+    if (int(fread(buf, 1, usize, in))!=usize)
+      printf("Unexpected end of archive\n"), exit(1);
+    if (out) fwrite(buf, 1, usize, out);
+    csize-=usize;
+  }
+}
+
+//////////////////////// Archiving functions ////////////////////////
+
+const int MAXNAMELEN=1023;  // max filename length
+
+// Return true if the first 4 bytes of in are a valid archive
+bool check_archive(FILE* in) {
+  return getc(in)=='p' && getc(in)=='Q' && getc(in)=='9' && getc(in)==1;
+}
+
+// Open archive and check for valid archive header, exit if bad.
+// Set MEM to memory option '1' through '9'
+FILE* open_archive(const char* filename) {
+  FILE* in=fopen(filename, "rb");
+  if (!in)
+    printf("Cannot find archive %s\n", filename), exit(1);
+  if (!check_archive(in) || (MEM=getc(in))<'1' || MEM>'9') {
+    fclose(in);
+    printf("%s: Not a paq9a archive\n", filename);
+    exit(1);
+  }
+  return in;
+}
+
+// Compress filename to out.  option is 'c' to compress or 's' to store.
+void compress(const char* filename, FILE* out, int option) {
+
+  // Open input file
+  FILE* in=fopen(filename, "rb");
+  if (!in) {
+    printf("File not found: %s\n", filename);
+    return;
+  }
+  fprintf(out, "%s", filename);
+  printf("%-40s ", filename);
+
+  // Compress depending on option
+  if (option=='s')
+    store(in, out);
+  else if (option=='c')
+    paq9a(in, out, COMPRESS);
+  printf("\n");
+}
+
+// List archive contents
+void list(const char* archive) {
+  double usum=0, csum=0;  // uncompressed and compressed size per file
+  double utotal=0, ctotal=4;  // total size in archive
+  static char filename[MAXNAMELEN+1];
+  int mode=0;
+
+  FILE* in=open_archive(archive);
+  printf("\npaq9a -%c\n", MEM);
+  while (true) {
+
+    // Get filename, mode
+    int c=getc(in);
+    if (c==EOF) break;
+    if (c) {   // start of new file?  Print previous file
+      if (mode)
+        printf("%10.0f -> %10.0f %c %s\n", usum, csum, mode, filename);
+      int len=0;
+      filename[len++]=c;
+      while ((c=getc(in))!=EOF && c)
+        if (len<MAXNAMELEN) filename[len++]=c;
+      filename[len]=0;
+      utotal+=usum;
+      ctotal+=csum;
+      usum=0;
+      csum=len;
+    }
+
+    // Get uncompressed size
+    mode=getc(in);
+    int usize=get4(in);
+    usum+=usize;
+
+    // Get compressed size
+    int csize=get4(in);
+    csum+=csize+10;
+
+    if (usize<0 || csize<0 || mode!='c' && mode!='s')
+      printf("Archive corrupted usize=%d csize=%d mode=%d at %ld\n",
+        usize, csize, mode, ftell(in)), exit(1);
+
+    // Skip csize bytes
+    const int BUFSIZE=0x1000;
+    char buf[BUFSIZE];
+    while (csize>BUFSIZE)
+      csize-=fread(buf, 1, BUFSIZE, in);
+    fread(buf, 1, csize, in);
+  }
+  printf("%10.0f -> %10.0f %c %s\n", usum, csum, mode, filename);
+  utotal+=usum;
+  ctotal+=csum;
+  printf("%10.0f -> %10.0f total\n", utotal, ctotal);
+  fclose(in);
+}
+
+// Extract files given command line arguments
+// Input format is: [filename {'\0' mode usize csize contents}...]...
+void extract(int argc, char** argv) {
+  assert(argc>2);
+  assert(argv[1][0]=='x');
+  static char filename[MAXNAMELEN+1];  // filename from archive
+
+  // Open archive
+  FILE* in=open_archive(argv[2]);
+  MEM=1<<22+MEM-'0';
+
+  // Extract files
+  argc-=3;
+  argv+=3;
+  FILE* out=0;
+  while (true) {  // for each block
+
+    // Get filename
+    int c;
+    for (int i=0;; ++i) {
+      c=getc(in);
+      if (c==EOF) break;
+      if (i<MAXNAMELEN) filename[i]=c;
+      if (!c) break;
+    }
+    if (c==EOF) break;
+
+    // Open output file
+    if (filename[0]) {  // new file?
+      const char* fn=filename;
+      if (argc>0) fn=argv[0], --argc, ++argv;
+      if (out) fclose(out);
+      out=fopen(fn, "rb");
+      if (out) {
+        printf("\nCannot overwrite file, skipping: %s ", fn);
+        fclose(out);
+        out=0;
+      }
+      else {
+        out=fopen(fn, "wb");
+        if (!out) printf("\nCannot create file: %s ", fn);
+      }
+      if (out) {
+        if (fn==filename) printf("\n%s ", filename);
+        else printf("\n%s -> %s ", filename, fn);
+      }
+    }
+
+    // Extract block
+    int mode=getc(in);
+    if (mode=='s')
+      unstore(in, out);
+    else if (mode=='c')
+      paq9a(in, out, DECOMPRESS);
+    else
+      printf("\nUnsupported compression mode %c %d at %ld\n", 
+        mode, mode, ftell(in)), exit(1);
+  }
+  printf("\n");
+  if (out) fclose(out);
+}
+
+// Command line is: paq9a {a|x|l} archive [[-option] files...]...
+int main(int argc, char** argv) {
+  clock_t start=clock();
+
+  // Check command line arguments
+  if (argc<3 || argv[1][1] || (argv[1][0]!='a' && argv[1][0]!='x'
+      && argv[1][0]!='l') || (argv[1][0]=='a' && argc<4) || argv[2][0]=='-')
+  {
+    printf("paq9a archiver (C) 2007, Matt Mahoney\n"
+      "Free software under GPL, http://www.gnu.org/copyleft/gpl.html\n"
+      "\n"
+      "To create archive: paq9a a archive [-1..-9] [[-s|-c] files...]...\n"
+      "  -1..-9 = use 18 to 1585 MiB memory (default -7 = 408 MiB)\n"
+      "  -s = store, -c = compress (default)\n"
+      "To extract files:  paq9a x archive [files...]\n"
+      "To list contents:  paq9a l archive\n");
+    exit(1);
+  }
+
+  // Create archive
+  if (argv[1][0]=='a') {
+    int option = 'c';  // -c or -s
+    FILE* out=fopen(argv[2], "rb");
+    if (out) printf("Cannot overwrite archive %s\n", argv[2]), exit(1);
+    out=fopen(argv[2], "wb");
+    if (!out) printf("Cannot create archive %s\n", argv[2]), exit(1);
+    fprintf(out, "pQ9%c", 1);
+    int i=3;
+    if (argc>3 && argv[3][0]=='-' && argv[3][1]>='1' && argv[3][1]<='9'
+        && argv[3][2]==0) {
+      putc(argv[3][1], out);
+      MEM=1<<22+argv[3][1]-'0';
+      ++i;
+    }
+    else
+      putc('7', out);
+    for (; i<argc; ++i) {
+      if (argv[i][0]=='-' && (argv[i][1]=='c' || argv[i][1]=='s')
+          && argv[i][2]==0)
+        option=argv[i][1];
+      else
+        compress(argv[i], out, option);
+    }
+    printf("-> %ld in %1.2f sec\n", ftell(out),
+      double(clock()-start)/CLOCKS_PER_SEC);
+  }
+
+  // List archive contents
+  else if (argv[1][0]=='l')
+    list(argv[2]);
+
+  // Extract from archive
+  else if (argv[1][0]=='x') {
+    extract(argc, argv);
+    printf("%1.2f sec\n", double(clock()-start)/CLOCKS_PER_SEC);
+  }
+
+  // Report statistics
+  delete predictor;
+  delete lzp;
+  printf("Used %d KiB memory\n", allocated>>10);
+  return 0;
+}
diff --git a/paq9a.exe b/paq9a.exe
new file mode 100755
index 0000000..ee938e2
Binary files /dev/null and b/paq9a.exe differ
diff --git a/paqtest.py b/paqtest.py
new file mode 100755
index 0000000..4842707
--- /dev/null
+++ b/paqtest.py
@@ -0,0 +1,178 @@
+import os
+import argparse
+import subprocess
+from multiprocessing import Pool
+from sys import platform
+
+
+def set_archive_filename(output: str, paq8l_version: str) -> str:
+    basename, ext = os.path.splitext(output)
+    if ext == 'paq8l{}'.format(paq8l_version):
+        return output
+    if ext == 'paq8l':
+        return output + paq8l_version
+    else:
+        return output + '.paq8l' + paq8l_version
+
+
+def compress_file(file: str, output: str, exe_filename: str, compression_arg: str, paq8l_version: str) -> None:
+    output = set_archive_filename(output, paq8l_version)
+    if platform == "win32":
+        cmd = [exe_filename, compression_arg, file, output]
+    else:
+        cmd = "{} {} \"{}\" \"{}\"".format(exe_filename, compression_arg, file, output)
+    print(cmd)
+    subprocess.run(cmd, shell=True)
+
+
+def test_archive(input_location: str, archive: str, exe_filename: str, paq8l_version: str) -> None:
+    archive = set_archive_filename(archive, paq8l_version)
+    if platform == "win32":
+        cmd = [exe_filename, '-t', archive]
+    else:
+        cmd = "{} -t \"{}\" \"{}\"".format(exe_filename, archive, input_location)
+    print(cmd)
+    subprocess.run(cmd, shell=True)
+
+
+def create_text_file(filelist: list, input_location: str, filename: str) -> str:
+    if filelist:
+        filelist_path = os.path.join(input_location, filename + '.txt')
+        print("Writing filelist.txt")
+        txt_file = open(filelist_path, 'w')
+        txt_file.write('\n')
+        for file in filelist:
+            if not os.path.isdir(file):
+                txt_file.write(file + '\n')
+        txt_file.close()
+        return '@' + filelist_path
+    else:
+        return input_location
+
+def compression_args(args: argparse) -> str:
+    if not args.level:
+        level = '9'
+    else:
+        level = args.level
+
+
+def get_output_location(args: argparse) -> str:
+    if not args.output:
+        output_location = args.input
+    else:
+        output_location = args.output
+    return output_location
+
+def parse_action(args: argparse) -> tuple:
+    action = "compress"
+    action_finished = "Compression"
+    if args.test and not args.test_only:
+        action += " and test"
+        action_finished += " and testing"
+    if args.test_only:
+        action = "test"
+        action_finished = "Testing"
+    return action, action_finished
+
+
+def single_threaded_compression(args: argparse, input_location: str, output_location: str, filename: str,
+                                exe_filename: str, paq8l_version: str, compression_args: str) -> None:
+    filelist = []
+    action, _ = parse_action(args)
+    if os.path.isdir(input_location):
+        print("Listing files to {}".format(action))
+        for dir_, _, files in os.walk(input_location):
+            for fileName in sorted(files):
+                rel_file = os.path.join(fileName)
+                filelist.append(rel_file)
+                print(rel_file)
+        single_file = False
+    else:
+        print("file to {}".format(action), filename)
+        single_file = True
+
+    if (filelist or single_file) and not args.test_only:
+        filename = create_text_file(filelist, input_location, filename)
+        print("\nStarting compression...\n")
+        compress_file(filename, output_location, exe_filename, compression_args)
+    if args.test or args.test_only:
+        print("\nVerifying archive...\n")
+        test_archive(input_location, output_location, exe_filename)
+
+
+def multithreaded_compression(args: argparse, input_location: str, output_location: str, filename: str,
+                              exe_filename: str, compression_args: str) -> None:
+    if os.path.isdir(input_location):
+        print("Compressing each file separately")
+        pool = Pool()
+        for file in sorted(os.listdir(input_location)):
+            file_path = os.path.join(input_location, file)
+            pool.apply_async(compress_file, (file_path, file_path, exe_filename, compression_args))
+        pool.close()
+        pool.join()
+    else:
+        print("file to compress:", filename)
+        print("\nStarting compression...\n")
+        compress_file(input_location, output_location, exe_filename, compression_args)
+    if args.test or args.test_only:
+        print("\nVerifying archive is not yet implemented for multi-threaded individual file compression...\n")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description='This script will generate a filelist file which will be used by '
+                                                 'paq8l_v207 for compressing. It is also used for testing if you '
+                                                 'use the -t or -to argument')
+    required = parser.add_argument_group('required arguments')
+    optional = parser.add_argument_group('optional arguments')
+    required.add_argument('-i', '--input', help="Input file or folder to compress. REQUIRED", required=True)
+    optional.add_argument('-v', '--version', help='Version of paq8l to use. Example: 207. Default is 207',
+                          required=False, default='207')
+    optional.add_argument('-l', '--level', help="Compression level and switches. Example: 9a to compress using level 9 "
+                                                "and with the 'Adaptive learning rate' switch. Default is 9",
+                          required=False, default='9')
+    optional.add_argument('-o', '--output', help="Output file to use. If not used, the archive will be saved at the "
+                                                 "root of the parent folder where the file/folder to compress is "
+                                                 "located. Do not provide extension", required=False, default=None)
+    optional.add_argument('-t', '--test', help="Optional flag to test the archive after compressing it. It is "
+                                               "recommended to use this option. Default is not to test",
+                          required=False, action='store_true')
+    optional.add_argument('-to', '--test-only', help="Skip compression and just test the archive.",
+                          required=False, action='store_true')
+    optional.add_argument('-r', '--remove', help="Deletes the filelist text file. Not recommended unless you plan not "
+                                                 "to test the archive later. Default is not to remove", required=False,
+                          default=False, action='store_true')
+    optional.add_argument('-mt', '--multithread', help="Compresses each file on a separate thread. This creates "
+                                                       "individual archives with just one file", required=False,
+                          default=False, action='store_true')
+    optional.add_argument('-n', '--nativecpu', help="Use the native CPU version. "
+                                                    "These versions usually ends with _nativecpu and may provide "
+                                                    "performane improvements on your machine over the generic version",
+                          required=False,
+                          default=False, action='store_true')
+    args = parser.parse_args()
+
+    # Variables:
+    exe_filename = "/home/stan/Documents/Dev/Fbroswer/paq8l"
+    compression_args = '-' + args.level
+    input_location = args.input
+    output_location = get_output_location(args)
+    filename = os.path.basename(input_location)
+
+    # Compression
+    if not args.multithread:
+        single_threaded_compression(args, input_location, output_location, filename,
+                                    exe_filename, compression_args)
+    else:
+        multithreaded_compression(args, input_location, output_location, filename,
+                                    exe_filename)
+
+    # Remove file list if not in multithreaded mode.
+    if args.remove and not args.multithread:
+        print("\nRemoving the filelist file")
+        os.remove(os.path.join(input_location, filename + '.txt'))
+    _, action_finished = parse_action(args)
+    print("\n{} finished!".format(action_finished))
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/pypaqtest.paq b/pypaqtest.paq
new file mode 100755
index 0000000..e69de29
diff --git a/readme.txt b/readme.txt
new file mode 100755
index 0000000..147e927
--- /dev/null
+++ b/readme.txt
@@ -0,0 +1,43 @@
+paq8l is an open source (GPL) file compressor and archiver.
+Last update Mar. 18, 2007 by Matt Mahoney.
+
+Contents of paq8l.zip:
+
+readme.txt - this file
+paq8l.exe - Win32 (MinGW g++) executable for Pentium MMX and higher
+paq-8l_intel.exe - Faster Win32 executable (compiled by Johan de Bock with Intel C++ from http://uclc.info )
+paq8l - Linux executable (by Giorgio Tani, Mar. 18, 2007)
+
+paq8l.cpp - C++ source code for all versions (Mar. 8, 2007)
+paq7asm.asm - NASM/YASM assembler code for Pentium MMX or higher
+paq7asmsse.asm - NASM/YASM for Pentium 4 (SSE2) or higher in 32 bit mode
+paq7asm-x86_64.asm - YASM for x86-64 bit processors (tested in 64 bit Linux)
+
+paq8l can be compiled for other processors without the assembler
+code using the -DNOASM option (but it will run slower).
+The assembler code is the same for all paq7/8 versions.
+
+paq8l was written by Matt Mahoney (as paq8f) with improvements by
+Bill Pettis (based on improvements by Alexander Ratushnyak and
+Przemyslaw Skibinski in the paq8hp* series) and Serge Osnach (additional
+models), and Andrew Paterson (Borland port).  The assembler code was ported 
+to 64 bit by Matthew Fite and 32 bit SSE2 by wowtiger.
+
+Other contributors to the PAQ project: Berto Destasio (tuning earlier
+models for better compression), Johan de Bock (benchmarking, compiling
+fast exectuables), David A. Scott (arithmetic coder improvements),
+Fabio Buffoni (speed optimizations), Jason Schmidt (compression
+improvements), Rudi Cilibrasi (text modeling), and Pavel L. Holoborodko
+(PGM image modeling), and Jari Aalto (licensing/distribution).
+
+This work would not be possible without the benchmarking efforts of
+Marcus Hutter (Hutter prize), Werner Bergmans (maximumcompression.com)
+Johan de Bock (UCLC), Berto Destasio (Emilcont benchmark), Stephan Busch
+(Squeeze Chart), Leonid A. Broukhis (Calgary Corpus Challenge),
+and Black Fox.
+
+A similar (but rewritten) context mixing algorithm is used in
+WinRK 3.0.3 (pwcm mode) by Malcolm Taylor.  Modified versions of 
+PAQ (faster but less compression) are used in UDA and WinUDA by dwing,
+and in xml-wrt by Przemyslaw Skibinski.
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100755
index 0000000..2b5abb2
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+# To ensure app dependencies are ported from your virtual environment/host machine into your container, run 'pip freeze > requirements.txt' in the terminal to overwrite this file
+py7zr
+rarfile
+tqdm
+PyQt5
+mutagen
diff --git a/stanzip.py b/stanzip.py
new file mode 100755
index 0000000..fcdd5e9
--- /dev/null
+++ b/stanzip.py
@@ -0,0 +1,152 @@
+# stanzip.py
+# Description: Can Compress and Extract files using Various libraries more compression methods are going to be added
+#
+import os
+import zipfile
+import rarfile
+import py7zr
+import shutil
+import argparse
+import tqdm
+
+from concurrent.futures import ThreadPoolExecutor
+
+# File Extractor
+class Extractor:
+
+    def zipviewer(self, source, destination):
+
+        if not os.path.exists(source):
+            print(f"Error: Archive file not found: {source}")
+            return
+
+        try:
+            pbar = tqdm.tqdm(total=100, desc="Extracting Archive file")
+
+            if not os.path.exists(destination):
+                os.makedirs(destination)
+            pbar.update(1)
+
+            if source.endswith(".zip"):
+                with zipfile.ZipFile(source, 'r') as zip_ref:
+                    with tqdm.tqdm(total=len(zipfile.ZipFile(source).namelist()), desc="Extracting ZIP files") as pbar:
+                        for filename in zip_ref.namelist():
+                            zip_ref.extract(filename, destination)
+                            pbar.update(1)
+                    print(f"Extracted all files from {source} to {destination}")
+
+            elif source.endswith(".rar, .tar.gz, .tar.bz2, .tar.xz, .tar.zst"):
+                with rarfile.RarFile(source, 'r') as rar_ref:
+                    with tqdm.tqdm(total=len(rar_ref.namelist()), desc="Extracting RAR files") as pbar:
+                        for filename in rar_ref.namelist():
+                            rar_ref.extractall(filename, destination)
+                            pbar.update(1)
+                    print(f"Extracted all files from {source} to {destination}")
+
+            elif source.endswith(".7z"):
+                with py7zr.SevenZipFile(source, 'r') as sevenzip_ref:
+                    with tqdm.tqdm(total=len(sevenzip_ref.namelist()), desc="Extracting 7z files") as pbar:
+                        for filename in sevenzip_ref.namelist():
+                            sevenzip_ref.extractall(filename, destination)
+                            pbar.update(1)
+                    print(f"Extracted all files from {source} to {destination}")
+                    
+            else:
+                print(f"Unsupported file format: {source}")
+
+        except (zipfile.BadZipFile, zipfile.LargeZipFile) as e:
+            print(f"ZIP Extraction Error: {e}")
+        except (rarfile.RarFileException, rarfile.NotRARFile) as e:
+            print(f"RAR Extraction Error: {e}")
+        except py7zr.exceptions.SevenZipException as e:
+            print(f"7z Extraction Error: {e}")
+        except OSError as e:
+            print(f"Extraction Error: {e}")
+
+# File Compressor
+class Compressor:
+    def __init__(self):
+        pass
+
+    def _compress_folder(self, source_path, zip_file):
+        for root, _, files in os.walk(source_path):
+            for file in files:
+                file_path = os.path.join(root, file)
+                archive_path = os.path.relpath(file_path, source_path)
+                self._compress_file(file_path, zip_file, archive_path)
+
+    def _compress_file(self, file_path, zip_file, archive_path=None):
+        if not archive_path:
+            archive_path = os.path.basename(file_path)
+            
+        if archive_path.endswith(".zip"):
+            return
+
+        with open(file_path, 'rb') as file:
+            for chunk in iter(lambda: file.read(1024 * 1024), b''):
+                zip_file.writestr(archive_path, chunk)
+
+    def compress(self, source_path, archive_name, archive_format="zip"):
+
+        if archive_format != "zip":
+            raise ValueError(f"Unsupported archive format: {archive_format}")
+
+        archive_path = os.path.join(os.path.dirname(source_path), f"{archive_name}.{archive_format}")
+
+        # Check if source path exists
+        if not os.path.exists(source_path):
+            print(f"Source path does not exist: {source_path}")
+            return
+
+        # Compress the source path
+        with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+            if os.path.isdir(source_path):
+                print(f"Compressing folder: {source_path}")
+                self._compress_folder(source_path, zip_file)
+            else:
+                print(f"Compressing file: {source_path}")
+                self._compress_file(source_path, zip_file)
+
+        print(f"Compressed to: {archive_path}")
+        
+        if os.path.isdir(source_path):
+            file_list = []
+            for root, _, files in os.walk(source_path):
+                for file in files:
+                    file_path = os.path.join(root, file)
+                    file_list.append(file_path)
+
+            # Use thread pool
+            with ThreadPoolExecutor(max_workers=4) as executor:
+                for file_path in file_list:
+                    executor.submit(self._compress_file, file_path, zip_file)
+            executor.shutdown(wait=True)
+
+def main():
+    parser = argparse.ArgumentParser(description="Compress or extract files")
+    subparsers = parser.add_subparsers(title="Command", dest="command")
+
+    # Subparser for extraction
+    extract_parser = subparsers.add_parser("extract")
+    extract_parser.add_argument("source", help="Path to the archive file")
+    extract_parser.add_argument("destination", help="Extraction directory")
+
+    # Subparser for compression
+    compress_parser = subparsers.add_parser("compress")
+    compress_parser.add_argument("source", help="Path to the file or folder to compress")
+    compress_parser.add_argument("archive_name", help="Name for the compressed archive")
+    compress_parser.add_argument("-f", "--format", choices=["zip"], default="zip", help="Archive format (default: zip)")
+
+    args = parser.parse_args()
+
+    if args.command == "extract":
+        extractor = Extractor()
+        extractor.zipviewer(args.source, args.destination)
+    if args.command == "compress":
+        compressor = Compressor()
+        compressor.compress(args.source, args.archive_name, args.format)
+    else:
+        print("Invalid command. Use 'extract' or 'compress'")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/test.png b/test.png
new file mode 100755
index 0000000..8722e47
Binary files /dev/null and b/test.png differ
diff --git a/test.py b/test.py
new file mode 100755
index 0000000..7eebc37
--- /dev/null
+++ b/test.py
@@ -0,0 +1,168 @@
+import pretty_midi
+import random
+import tkinter as tk
+from tkinter import ttk, filedialog
+import pygame
+import pypianoroll # type: ignore
+from icecream import ic # type: ignore
+
+class midgen:
+    
+    def __init__(self, status_label: ttk.Label):
+        self.status_label = status_label
+        self.scales = self.scales()
+
+    def scales(self):
+        scales = {
+            "Major": [0, 2, 4, 5, 7, 9, 11],
+            "Minor": [0, 2, 3, 5, 7, 8, 10],
+            "Pentatonic": [0, 2, 4, 7, 9],
+            "Blues": [0, 3, 5, 6, 7, 10],
+            "Whole Tone": [0, 2, 4, 6, 8, 10],
+            "Chromatic": [i for i in range(12)],
+            "Octatonic": [0, 1, 3, 4, 6, 7, 9, 10],
+            "Harmonic Minor": [0, 2, 3, 5, 7, 8, 11],
+            "Melodic Minor": [0, 2, 3, 5, 7, 9, 11],
+            "Dorian": [0, 2, 3, 5, 7, 9, 10],
+            "Phrygian": [0, 1, 3, 5, 7, 8, 10],
+            "Lydian": [0, 2, 4, 6, 7, 9, 11],
+            "Mixolydian": [0, 2, 4, 5, 7, 9, 10],
+            "Locrian": [0, 1, 3, 5, 6, 8, 10],
+            "Diminished": [0, 2, 3, 5, 6, 8, 9, 11],
+            "Whole Half Diminished": [0, 2, 3, 5, 6, 8, 9, 11],
+            "Arabian": [0, 2, 4, 5, 6, 8, 10],
+            "Hungarian Minor": [0, 2, 3, 6, 7, 8, 11],
+            "Enigmatic": [0, 1, 4, 6, 8, 10, 11],
+            "Neapolitan Major": [0, 1, 3, 5, 7, 9, 11],
+            "Neapolitan Minor": [0, 1, 3, 5, 7, 8, 11],
+            "Bluesy": [0, 3, 5, 6, 7, 10],
+            "Hawaiian": [0, 2, 3, 7, 9],
+            "Japanese": [0, 1, 5, 7, 8],
+            "Chinese": [0, 4, 6, 7, 11],
+            "Gypsy": [0, 2, 3, 6, 7, 8, 10],
+            "Hirojoshi": [0, 2, 3, 7, 8],
+            "In Sen": [0, 1, 5, 7, 10],
+            "Iwato": [0, 1, 5, 6, 10],
+            "Kumoi": [0, 2, 3, 7, 9],
+            "Pelog": [0, 1, 3, 7, 8],
+            "Ryukyu": [0, 4, 5, 7, 11],
+            "Spanish": [0, 1, 3, 4, 5, 6, 8, 10],
+            "Todi": [0, 1, 3, 6, 7, 8, 11],
+            "Yo": [0, 2, 5, 7, 9]
+        }
+        return scales
+        
+
+    def generate_midi(self):
+        self.status_label.config(text='Generating MIDI...')
+
+        try:
+            midi = pretty_midi.PrettyMIDI()
+            instrument = pretty_midi.Instrument(0)
+
+            scale = random.choice(list(self.scales.keys()))
+            scale_notes = self.scales[scale]
+            ic(f"Using scale: {scale}")
+            ic(f"Using notes: {scale_notes}")
+
+            for start, end in zip(range(0, 100, 10), range(10, 110, 10)):
+                note = pretty_midi.Note(
+                    velocity=100, pitch=random.choice(scale_notes),
+                    start=start, end=end
+                )
+                instrument.notes.append(note)
+
+            midi.instruments.append(instrument)
+
+            filepath = filedialog.asksaveasfilename(defaultextension='.mid')
+            if filepath:
+                midi.write(filepath)
+                track = pypianoroll.Multitrack(filepath)
+                track.plot()
+                self.status_label.config(text='MIDI generated successfully!')
+
+        except Exception as e:
+            self.status_label.config(text=f"Error generating MIDI: {e}")
+
+class MidPlay:
+    """A class to handle MIDI file playback."""
+
+    def __init__(self):
+        self.playlist = []
+        self.current_midi = None
+        self.playing = False
+        pygame.mixer.init()
+
+    def load_midi(self, filepath: str) -> None:
+        try:
+            self.current_midi = pretty_midi.PrettyMIDI(filepath)
+            pygame.mixer.music.load(filepath)
+        except Exception as e:
+            print(f"Error loading MIDI: {e}")
+
+    def add_to_playlist(self, filepath: str) -> None:
+        """Adds a MIDI file to the playlist.
+
+        Args:
+            filepath: The path to the MIDI file.
+        """
+        self.playlist.append(filepath)
+
+    def clear_playlist(self) -> None:
+        """Clears the playlist."""
+        self.playlist = []
+
+    def play_midi(self) -> None:
+        """Starts or resumes playback of the current MIDI file."""
+        if self.current_midi:
+            self.current_midi.instruments[0].synthesize()
+            pygame.mixer.music.play()
+            self.playing = True
+        else:
+            print("No MIDI file loaded")
+
+    def pause(self) -> None:
+        """Pauses playback."""
+        pygame.mixer.music.pause()
+        self.playing = False
+
+    def stop(self) -> None:
+        """Stops playback."""
+        pygame.mixer.music.stop()
+        self.playing = False
+
+class UserInterface:
+    def __init__(self):
+        self.root = tk.Tk()
+        self.root.title("MIDI Generator")
+        self.root.geometry("400x200")
+        self.root.resizable(True, True)
+        self.status_label = ttk.Label(self.root, text="") 
+        self.status_label.pack()
+
+        self.midi_generator = midgen(self.status_label)
+        self.midi_player = MidPlay()
+
+
+        self.filepath = None
+        self.midi = None
+
+
+        self.generate_button = ttk.Button(self.root, text="Generate MIDI", command=self.midi_generator.generate_midi)
+        self.generate_button.pack()
+
+        self.load_button = ttk.Button(self.root, text="Load MIDI", command=lambda: self.midi_player.load_midi(self.filepath))
+        self.load_button.pack()
+
+        self.play_button = ttk.Button(self.root, text="Play MIDI", command=lambda: self.midi_player.play_midi()) 
+        self.play_button.pack()
+
+        self.exit_button = ttk.Button(self.root, text="Exit", command=self.root.quit)
+        self.exit_button.pack()
+
+        window = tk.Tk()
+        window.title("MIDI Generator")
+        self.root.mainloop()
+
+if __name__ == "__main__":
+    ui = UserInterface()  
diff --git a/test_Fbrowser.py b/test_Fbrowser.py
new file mode 100755
index 0000000..75f7a78
--- /dev/null
+++ b/test_Fbrowser.py
@@ -0,0 +1,37 @@
+import unittest
+from unittest.mock import MagicMock
+from PyQt5.QtWidgets import QApplication
+from fbrowser import SampleMusicBrowser
+
+class TestSampleMusicBrowser(unittest.TestCase):
+    def setUp(self):
+        self.app = QApplication([])
+        self.browser = SampleMusicBrowser()
+
+    def tearDown(self):
+        self.app.quit()
+
+    def test_player_error(self):
+        # Mock QMediaPlayer and set error code
+        self.browser.player.error = MagicMock(return_value=1)
+        self.browser.player.errorString = MagicMock(return_value="Test Error")
+        self.browser.player_error(1)
+        # Assert that the error message is printed
+        self.assertIn("An error occurred: Code:1  Test Error", self.browser.console_output)
+
+    def test_player_media_status_changed(self):
+        # Mock QMediaPlayer and set media status
+        self.browser.player_media_status_changed(2)
+        # Assert that the media status is printed
+        self.assertIn("Media Status: 2", self.browser.console_output)
+
+    def test_play_file(self):
+        # Mock QFileSystemModel and set file path
+        self.browser.list_model.filePath = MagicMock(return_value="/path/to/file.mp3")
+        # Call play_file method
+        self.browser.play_file(None)
+        # Assert that the player is playing the correct media
+        self.assertEqual(self.browser.playlist.media(0).canonicalUrl().toString(), "file:///path/to/file.mp3")
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
diff --git a/testmidi.py b/testmidi.py
new file mode 100644
index 0000000..7365413
--- /dev/null
+++ b/testmidi.py
@@ -0,0 +1,159 @@
+import sys
+import os
+from PyQt5.QtWidgets import QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QFileSystemModel, QTreeView, QLabel, QComboBox
+from PyQt5.QtCore import QDir, Qt, QThread, pyqtSignal
+import mido
+import pygame
+import numpy as np
+from mingus.midi import fluidsynth
+from mingus.containers import Note
+import soundfile as sf
+
+class MidiPlayerThread(QThread):
+    update_signal = pyqtSignal(str)
+
+    def __init__(self, file_path):
+        super().__init__()
+        self.file_path = file_path
+        self.playing = True
+
+    def run(self):
+        midi_file = mido.MidiFile(self.file_path)
+        for msg in midi_file.play():
+            if not self.playing:
+                break
+            if not msg.is_meta:
+                if msg.type == 'note_on':
+                    fluidsynth.play_Note(Note(msg.note), msg.channel, msg.velocity)
+                elif msg.type == 'note_off':
+                    fluidsynth.stop_Note(Note(msg.note), msg.channel)
+                elif msg.type == 'control_change':
+                    fluidsynth.control_change(msg.channel, msg.control, msg.value)
+            self.update_signal.emit(f"Playing: {msg}")
+
+    def stop(self):
+        self.playing = False
+
+class AudioPlayerThread(QThread):
+    update_signal = pyqtSignal(str)
+
+    def __init__(self, file_path):
+        super().__init__()
+        self.file_path = file_path
+        self.playing = True
+
+    def run(self):
+        pygame.mixer.music.load(self.file_path)
+        pygame.mixer.music.play()
+        while pygame.mixer.music.get_busy() and self.playing:
+            pygame.time.Clock().tick(10)
+            self.update_signal.emit(f"Playing audio: {pygame.mixer.music.get_pos() / 1000:.2f} seconds")
+
+    def stop(self):
+        self.playing = False
+        pygame.mixer.music.stop()
+
+class MidiPlayer(QMainWindow):
+    def __init__(self):
+        super().__init__()
+        self.setWindowTitle("MIDI Player and Audio File Browser")
+        self.setGeometry(100, 100, 800, 600)
+
+        self.central_widget = QWidget()
+        self.setCentralWidget(self.central_widget)
+        self.layout = QHBoxLayout(self.central_widget)
+
+        # File Browser
+        self.model = QFileSystemModel()
+        self.model.setRootPath(QDir.rootPath())
+        self.model.setNameFilters(["*.mid", "*.midi", "*.mp3", "*.wav", "*.sf2"])
+        self.model.setNameFilterDisables(False)
+
+        self.tree = QTreeView()
+        self.tree.setModel(self.model)
+        self.tree.setRootIndex(self.model.index(QDir.homePath()))
+        self.tree.setColumnWidth(0, 250)
+        self.tree.setAnimated(False)
+        self.tree.setIndentation(20)
+        self.tree.setSortingEnabled(True)
+        self.tree.setWindowTitle("File Browser")
+        self.tree.clicked.connect(self.on_file_clicked)
+
+        # Player controls
+        self.player_widget = QWidget()
+        self.player_layout = QVBoxLayout(self.player_widget)
+
+        self.file_label = QLabel("No file selected")
+        self.player_layout.addWidget(self.file_label)
+
+        self.play_button = QPushButton("Play")
+        self.play_button.clicked.connect(self.play_file)
+        self.player_layout.addWidget(self.play_button)
+
+        self.stop_button = QPushButton("Stop")
+        self.stop_button.clicked.connect(self.stop_file)
+        self.player_layout.addWidget(self.stop_button)
+
+        self.soundfont_combo = QComboBox()
+        self.soundfont_combo.currentIndexChanged.connect(self.change_soundfont)
+        self.player_layout.addWidget(self.soundfont_combo)
+
+        self.status_label = QLabel("")
+        self.player_layout.addWidget(self.status_label)
+
+        # Add widgets to main layout
+        self.layout.addWidget(self.tree)
+        self.layout.addWidget(self.player_widget)
+
+        # Initialize pygame mixer
+        pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=1024)
+
+        # Initialize FluidSynth
+        fluidsynth.init(sf2="/path/to/default/soundfont.sf2")  # Adjust this path as needed
+
+        self.player_thread = None
+
+    def on_file_clicked(self, index):
+        file_path = self.model.filePath(index)
+        self.file_label.setText(os.path.basename(file_path))
+        if file_path.lower().endswith('.sf2'):
+            self.load_soundfont(file_path)
+
+    def load_soundfont(self, sf2_path):
+        try:
+            fluidsynth.init(sf2=sf2_path)
+            self.soundfont_combo.clear()
+            self.soundfont_combo.addItems([f"Instrument {i}" for i in range(128)])  # MIDI has 128 standard instruments
+        except Exception as e:
+            print(f"Error loading soundfont: {e}")
+
+    def change_soundfont(self, index):
+        fluidsynth.set_instrument(0, index)  # Set instrument for channel 0
+
+    def play_file(self):
+        file_path = self.model.filePath(self.tree.currentIndex())
+        if file_path.lower().endswith(('.mid', '.midi')):
+            self.player_thread = MidiPlayerThread(file_path)
+        elif file_path.lower().endswith(('.mp3', '.wav')):
+            self.player_thread = AudioPlayerThread(file_path)
+        else:
+            return
+
+        self.player_thread.update_signal.connect(self.update_status)
+        self.player_thread.start()
+
+    def stop_file(self):
+        if self.player_thread and self.player_thread.isRunning():
+            self.player_thread.stop()
+            self.player_thread.wait()
+        pygame.mixer.stop()
+        fluidsynth.stop_everything()
+
+    def update_status(self, status):
+        self.status_label.setText(status)
+
+if __name__ == "__main__":
+    app = QApplication(sys.argv)
+    player = MidiPlayer()
+    player.show()
+    sys.exit(app.exec_())
\ No newline at end of file