/*!***************************************************************************
*!
*! FILE NAME  : train_mod.c
*!
*! DESCRIPTION: Synchronous serial driver module for model railway train
*!              control, for the ETRAX 100LX chip by Axis Communications AB.
*!              Designed to work with Linux version 2.6.12 and Axis SDK
*!              version 2.01. See http://developer.axis.com/ .
*!
*!              Intended for use with the Acme systems FOX board, see
*!              http://www.acmesystems.it/ , but it should work on other
*!              ETRAX 100LX based platforms as well.
*!
*!              The module is accessed from the user application through
*!              the open, write and close functions. The module uses the
*!              syncser1 device (major 125, minor 1) by default, but it
*!              is possible to select syncser0 (major 125, minor 0) instead,
*!              or both. This is selected with module parameters at module
*!              installation. The syncser0 device corresponds to sync. serial
*!              port 1 on ETRAX 100LX, and the syncser1 device corresponds
*!              to sync. serial port 3. Only the sync. serial port 3 is useful
*!              on the FOX board, since the pins for sync. serial port 1 are
*!              used for USB.
*!
*!              Each write command should contain from 1 to 32 bytes, where
*!              the first byte is a command byte, and the rest is packet
*!              data or command parameters. Erroneous or too long commands
*!              will be silently consumed. The write command times out
*!              and returns 0 if the transmitter is busy for more than
*!              one jiffie. The application should then make a retry.
*!
*!              The following commands are defined (in train_mod.h) for
*!              the moment:
*!
*!              train_cmd_nop        No operation.
*!
*!              train_cmd_dcc        DCC train command, defined but not
*!                                   implemented.
*!
*!              train_cmd_mfx        Märklin/ESU mfx train command, defined
*!                                   but not implemented.
*!
*!              train_cmd_mm         Märklin/Motorola train command.
*!                                   The command consists of 4 bytes. Byte 0
*!                                   is the command code, and the following
*!                                   three bytes contain the train command.
*!                                   The train command is sent lsb first and
*!                                   byte 1 first. A Märklin/Motorola packet
*!                                   is 18 bits, so the 6 upper bits of byte 3
*!                                   are ignored.
*!
*!              train_cmd_mm_pause   Märklin/Motorola pause configuration.
*!                                   The command consists of 3 bytes. Byte 0
*!                                   is the command code and the two following
*!                                   bytes form a 16-bit pause value, with
*!                                   byte 1 as the LSB and byte 2 as the MSB.
*!                                   The value should be given in units of
*!                                   17.36 us. Standard pause values for the
*!                                   Märklin/Motorola protocol are defined in
*!                                   train_mod.h .
*!
*!              For information about the Märklin/Motorola format, see
*!              http://spazioinwind.libero.it/scorzoni/motorola.htm
*!
*!              For information about the Märklin/ESU mfx format, see
*!              http://www.mue473.de/mfxrahmen.htm
*!
*!              For information about the DCC format, see
*!              http://www.nmra.org/standards/DCC/standards_rps/DCCStds.html
*!
*!              Known problems:
*!              ===============
*!
*!              1. Because of the somewhat buggy port allocation mechanisms
*!                 in the Axis SDK 2.01, we have to deallocate the pins for
*!                 the usb1 port, even if we only use sync. serial port 3
*!                 which has no actual pin conflicts with usb1. To not drop
*!                 the usb1 port, we reallocate it again in this case. This
*!                 will result in a wild pointer when the train_mod module is
*!                 removed. If another module later tries to allocate the usb1
*!                 port, and the usb1 port is not free at that time, it may
*!                 cause an oops. I consider the potential risk for an oops
*!                 a smaller problem than the loss of a usb port.
*!
*!                 The port allocation bug is reported to Axis, and is likely
*!                 to be fixed in a later version of the SDK. When it is fixed,
*!                 The deallocation/reallocation of usb1 should be removed.
*!                 Use the define USB1_ALLOCATION_BUG below to control it.
*!
*!              2. Since there is no system-wide shadow register in the
*!                 Axis SDK 2.01 for the R_SYNC_SERIAL_PRESCALE register,
*!                 it is not possible to let this driver coexist with another
*!                 driver for the other sync. serial port. This would be
*!                 difficult anyway, since some configurations are common to
*!                 both ports.
*!
*! Version: 0.0, 2007-01-27:   Initial version, not tested.
*!
*! ---------------------------------------------------------------------------
*!
*! (C) Copyright 2007 Per Zander, SWEDEN http://home.swipnet.se/perz/
*!
*! ---------------------------------------------------------------------------
*!
*!    This program is free software; you can redistribute it and/or modify
*!    it under the terms of the GNU General Public License as published by
*!    the Free Software Foundation; either version 2 of the License, or
*!    (at your option) any later version.
*!
*!    This program is distributed in the hope that it will be useful,
*!    but WITHOUT ANY WARRANTY; without even the implied warranty of
*!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*!    GNU General Public License for more details.
*!
*!		To have a copy of the GNU General Public License write to the Free
*!    Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
*!    02110-1301  USA.
*!
*!***************************************************************************/

//------------------------------------------------------------------
// Includes.
//------------------------------------------------------------------

//----- Linux includes. -----*

#include <linux/init.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/kdev_t.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/interrupt.h>
#include <linux/completion.h>
#include <asm/uaccess.h>
#include <asm/arch/io_interface_mux.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/dma.h>

//----- Application include file. -----*

#include "train_mod.h"

//------------------------------------------------------------------
// Defines.
//------------------------------------------------------------------

//----- Device nodes. -----*

#define SYNCSER_MAJOR 125
#define SYNCSER_NAME "syncser"

//----- Data output polarity. -----*

// Set it to 1 if you have an external inverter on the synchronous serial
// data output. Set it to 0 otherwise.

#define TRAIN_EXTERNAL_DATA_INVERTER 1

//----- USB port 1 allocation. -----*

// Set USB1_ALLOCATED to 1 if your kernel is configured to use the usb1 port.
// This is typically true with the precompiled kernels for the FOX board.
// Set it to 0 otherwise.

#define USB1_ALLOCATED 1

// Set USB1_ALLOCATION_BUG to 1 if the pin allocation bug found in the Axis
// SDK 2.01 version of file arch/cris/arch/kernel/io_interface_mux.c isn't
// fixed in your kernel version. Set it to 0 otherwise.

#define USB1_ALLOCATION_BUG 1

// Write command timeout, in number of jiffies.

#define TRAIN_WR_TIMEOUT 1

//------------------------------------------------------------------
// Prototypes.
//------------------------------------------------------------------

static irqreturn_t tr_interrupt(int irq, void * dev_id, struct pt_regs * regs);
int train_open (struct inode * my_inode, struct file * my_file);
int train_release (struct inode * my_inode, struct file * my_file);
ssize_t train_write (struct file * my_file, const char __user * my_data,
                 size_t my_size, loff_t * offp);
static void train_exit(void);

//------------------------------------------------------------------
// Data type definitions.
//------------------------------------------------------------------

//----- Structure for port specific data. -----*

// Used by open and release functions.

struct train_dma_irq_reg_info {
  unsigned int dma_irq_nr;
  unsigned int dma_nr;
  enum dma_owner owner;
  const char * dma_irq_descr;
  volatile u32 * sser_ctrl;
  volatile u32 * dma_first;
  volatile u8 * dma_cmd;
  const volatile u8 * dma_status;
  volatile u8 * clr_intr;
  unsigned int mask_bit;
};

//----- Packet data received from the user application. -----*

// The first byte is a command byte and the rest is data, but it does not
// make sense to make that distinction in the structure definition.

struct train_packet_data {
  unsigned char data[32];
};

//----- Structures containing the DMA descriptor lists. -----*

// Märklin/Motorola half data packet descriptors. A Märklin/Motorola
// half data packet consists of 18 data bits. Each DMA descriptor handles
// 3 bits, so we need 6 descriptors.

struct train_mm_dma_descr_data {
  struct etrax_dma_descr d[6];
};

// Märklin/Motorola complete packet including pauses. We need two half
// data packets, one intra packet gap descriptor (t1) and two inter packet
// pause descriptors, one with the descriptor interrupt set (tr) and one
// without interrupt (t2).

struct train_mm_dma_descr {
  struct train_mm_dma_descr_data p[2];
  struct etrax_dma_descr t1;
  struct etrax_dma_descr t2;
  struct etrax_dma_descr tr;
};

// All descriptors are gathered in one struct for convenience. There are
// two complete Märklin/Motorola packets. The rest of the descriptors
// are reserved for other protocols.

struct train_dma_descr_pool {
  struct train_mm_dma_descr mm[2];
  struct etrax_dma_descr p[40];
};

//----- Packet data patterns. -----*

// Märklin/Motorola data pattern struct. Märklin/Motorola packets are sent
// in chunks of 3 bits. Each bit is 0.208 ms long. With a sync. serial baud
// rate of 460.8 k three Märklin/Motorola bits correspond to 36 bytes of
// DMA buffer data.

struct train_mm_3_bits {
  unsigned char data[36];
};

// Gather all packet data patterns into one struct.
// To represent all possible combinations of 3-bit Märklin/Mortorola data,
// we need 8 different data patterns. Then we also need a buffer containing
// the maximum length Märklin/Motorola pause. Some extra bytes are reserved
// for implementation of other protocols.

struct train_ser_data {
  struct train_mm_3_bits mm_data[8];
  unsigned char mm_pause[TRAIN_MM_MAX_PACKET_GAP];
  unsigned char data[1672];
};

//----- Device struct. -----*

struct train_dev {
  struct train_dma_descr_pool * descr_list; // Pointer to start of DMA descr.
  int ref_cnt;                              // Count open - release.
  int nr;                                   // Which serial port is used?
  struct cdev my_cdev;
  struct semaphore sem;                     // To protect open and release.
  struct completion * cmpl;                 // To synchronize write and irq.
  int descr_list_nr;                        // Which list is free for write?
};

//----- Initialization progress levels. -----*

enum train_init_t {
  train_init_success,
  train_init_dma1,
  train_init_cdev0,
  train_init_dma0,
  train_init_started,
  train_init_ports,
  train_init_port1,
  train_init_region,
  train_init_none
};

//----- Module parameters. -----*

static int major = SYNCSER_MAJOR;
static int minor = 1;
static int sser1 = 0;
static int sser3 = 1;
static char * name = SYNCSER_NAME;
static int sticky = 0;

module_param(major, int, S_IRUGO);
module_param(minor, int, S_IRUGO);
module_param(sser1, int, S_IRUGO);
module_param(sser3, int, S_IRUGO);
module_param(name, charp, S_IRUGO);
module_param(sticky, int, S_IRUGO); // If set, the transmission is started
                                    // already at module installation.
                                    // Otherwise at first open.

//----- Module variables. -----*

static dev_t dev = MKDEV(0, 0);

//----- File operations struct. -----*

static struct file_operations train_fops = {
  .owner   = THIS_MODULE,
  .write   = train_write,
  .open    = train_open,
  .release = train_release,
};

static struct train_dev train_devices[2];         // One per channel.
static void * dma_mem_base;                       
static struct train_ser_data * ser_data;
static enum train_init_t train_init_level = train_init_none;

//----- Configuration data for the synchronous serial port. -----*

// We can use R_SYNC_SERIAL1_CTRL values for both channels since both
// channels have the same control register layout.

static const u32 sser_ctrl_val =
                  IO_STATE(R_SYNC_SERIAL1_CTRL, tr_baud, c460k8Hz) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, dma_enable, on) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, mode, master_output) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, error, ignore) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, rec_enable, disable) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, f_synctype, normal) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, f_syncsize, bit) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, f_sync, on) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, clk_mode, normal) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, clk_halt, running) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, bitorder, lsb) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, wordsize, size8bit) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, buf_empty, lmt_8) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, buf_full, lmt_32) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, flow_ctrl, disabled) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, clk_polarity, pos) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, frame_polarity, normal) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, status_polarity, normal) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, clk_driver, normal) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, frame_driver, normal) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, status_driver, normal) |
                  (TRAIN_EXTERNAL_DATA_INVERTER ?
                  IO_STATE(R_SYNC_SERIAL1_CTRL, def_out0, high) :
                  IO_STATE(R_SYNC_SERIAL1_CTRL, def_out0, low));

// These must be static since the de-allocation of the DMA channel
// tries to match the pointer, not the string itself.

static const char * ser1_dma_irq_descr = "serial 1 dma tr";
static const char * ser3_dma_irq_descr = "serial 3 dma tr";

//----- Completion structures. -----*

DECLARE_COMPLETION(cmpl0);
DECLARE_COMPLETION(cmpl1);

//------------------------------------------------------------------
// Local functions.
//------------------------------------------------------------------

//----- Get train_dev structure from port number. -----*

static inline struct train_dev * get_train_dev(unsigned int nr)
{
  return &train_devices[sser1 & nr];
}

//----- Get port specific info about irq, dma and reg addresses. -----*

static inline void get_dma_irq_reg_info(struct train_dma_irq_reg_info * info,
                                        int nr)
{
  if (nr) {
    info->dma_irq_nr = SER3_DMA_TX_IRQ_NBR;
    info->dma_nr = SER3_TX_DMA_NBR;
    info->owner = dma_ser3;
    info->dma_irq_descr = ser3_dma_irq_descr;
    info->sser_ctrl = R_SYNC_SERIAL3_CTRL;
    info->dma_first = R_DMA_CH4_FIRST;
    info->dma_cmd = R_DMA_CH4_CMD;
    info->dma_status = R_DMA_CH4_STATUS;
    info->clr_intr = R_DMA_CH4_CLR_INTR;
    info->mask_bit = IO_BITNR(R_IRQ_MASK2_SET, dma4_descr);
  }
  else {
    info->dma_irq_nr = SER1_DMA_TX_IRQ_NBR;
    info->dma_nr = SER1_TX_DMA_NBR;
    info->owner = dma_ser1;
    info->dma_irq_descr = ser1_dma_irq_descr;
    info->sser_ctrl = R_SYNC_SERIAL1_CTRL;
    info->dma_first = R_DMA_CH8_FIRST;
    info->dma_cmd = R_DMA_CH8_CMD;
    info->dma_status = R_DMA_CH8_STATUS;
    info->clr_intr = R_DMA_CH8_CLR_INTR;
    info->mask_bit = IO_BITNR(R_IRQ_MASK2_SET, dma8_descr);
  }
}

//----- Setup cdev structure. -----*

static int train_setup_cdev (struct train_dev * my_dev, int nr)
{
  int err_no;
  dev_t devno;
  
  devno = MKDEV(MAJOR(dev), MINOR(dev) + nr);
  cdev_init(&my_dev->my_cdev, &train_fops);
  my_dev->my_cdev.owner = THIS_MODULE;
  err_no = cdev_add(&my_dev->my_cdev, devno, 1);
  if (err_no) printk(KERN_ALERT "Error: %d adding train_mod%d\n", err_no, nr);
  return err_no;
}

//----- Initialize serial data patterns. -----*

static void init_ser_data (struct train_ser_data * data)
{
  int i, j, k, b;
  unsigned char v;
  
  // Initialize Märklin/Motorola (MM) data patterns. Each data pattern
  // represents a 3-bit MM data sequence. There are 8 different patterns
  // to cover all possible 3-bit combinations.
  //
  // A MM bit is 0.208 ms long, i.e., 0.208 * 460.8 / 8 = 12 pattern bytes.
  //
  // A MM "0" is coded as 26 us (12 pattern bits) high followed by 182 us
  // (84 pattern bits) low.
  // A MM "1" is coded as 182 us (84 pattern bits) high followed by 26 us
  // (12 pattern bits) low.
  //
  // Data from the user application (MM packets) are sent lsb first.
  // The synchronous serial port sends pattern bytes lsb first.

  for (i = 0; i < 8; i++) {
    for (j = 0; j < 3; j++) {
      b = ((i >> j) & 1) ? 10 : 1;
      for (k = 0; k < 12; k++) {
        v = (k < b) ? 0xff : ((k == b) ? 0x0f : 0);
        v = TRAIN_EXTERNAL_DATA_INVERTER ? ~v : v;
        data->mm_data[i].data[k + 12 * j] = v;
      }
    }
  }
  
  // Initialize Märklin/Motorola pause.

  v = TRAIN_EXTERNAL_DATA_INVERTER ? 0xff : 0;
  for (i = 0; i < TRAIN_MM_MAX_PACKET_GAP; i++) {
    data->mm_pause[i] = v;
  }
}

//----- Initalize DMA descriptors. -----*

static void init_descr_list (struct train_dma_descr_pool * descr_list)
{
  int i, j, k, d;

  // Initialize Märklin/Motorola data packet list.
  //
  //  +---------------->mm[0].p[0].d[0]-->mm[0].p[0].d[1]-->mm[0].p[0].d[2]--+
  //  |                                                                      |
  //  |   +--mm[0].t1<--mm[0].p[0].d[5]<--mm[0].p[0].d[4]<--mm[0].p[0].d[3]<-+
  //  |   |
  //  |   +------------>mm[0].p[1].d[0]-->mm[0].p[1].d[1]-->mm[0].p[1].d[2]--+
  //  |                                                                      |
  //  +-<---mm[0].t2<-+-mm[0].p[1].d[5]<--mm[0].p[1].d[4]<--mm[0].p[1].d[3]<-+
  //  |               :
  //  | +-<-mm[1].tr<-+
  //  | |
  //  | +-------------->mm[1].p[0].d[0]-->mm[1].p[0].d[1]-->mm[1].p[0].d[2]--+
  //  | |                                                                    |
  //  | | +--mm[1].t1<--mm[1].p[0].d[5]<--mm[1].p[0].d[4]<--mm[1].p[0].d[3]<-+
  //  | | |
  //  | | +------------>mm[1].p[1].d[0]-->mm[1].p[1].d[1]-->mm[1].p[1].d[2]--+
  //  | |                                                                    |
  //  | +-<-mm[1].t2<-+-mm[1].p[1].d[5]<--mm[1].p[1].d[4]<--mm[1].p[1].d[3]<-+
  //  |               :
  //  +-<---mm[0].tr<-+

  for (i = 0; i < 2; i++) {
    for (j = 0; j < 2; j++) {
      d = TRAIN_MM_IDLE_PACKET;          // Start with the idle packet.
      for (k = 0; k < 6; k++) {
        descr_list->mm[i].p[j].d[k].sw_len = 36;
        descr_list->mm[i].p[j].d[k].ctrl = 0;
        if (k < 5) {
          descr_list->mm[i].p[j].d[k].next =
            virt_to_phys(&(descr_list->mm[i].p[j].d[k + 1]));
        }
        descr_list->mm[i].p[j].d[k].buf =
          virt_to_phys(&(ser_data->mm_data[d & 7]));
        descr_list->mm[i].p[j].d[k].hw_len = 0;
        descr_list->mm[i].p[j].d[k].status = 0;
        descr_list->mm[i].p[j].d[k].fifo_len = 0;
        d >>= 3;
      }
    }
    descr_list->mm[i].p[0].d[5].next = virt_to_phys(&(descr_list->mm[i].t1));
    descr_list->mm[i].p[1].d[5].next = virt_to_phys(&(descr_list->mm[i].t2));

    // Märklin/Motorola gap between the two packet halves.

    descr_list->mm[i].t1.sw_len = TRAIN_MM_INTRA_PACKET_GAP;
    descr_list->mm[i].t1.ctrl = 0;
    descr_list->mm[i].t1.next = virt_to_phys(&(descr_list->mm[i].p[1].d[0]));
    descr_list->mm[i].t1.buf = virt_to_phys(&(ser_data->mm_pause[0]));
    descr_list->mm[i].t1.hw_len = 0;
    descr_list->mm[i].t1.status = 0;
    descr_list->mm[i].t1.fifo_len = 0;

    // Märklin/Motorola inter packet pauses. By default we use the
    // "Universal" timing, which alternates between 4.025 ms and 6.025 ms.

    descr_list->mm[i].t2.sw_len = i ? TRAIN_MM_INTER_PACKET_GAP :
                                      TRAIN_MM_MAX_PACKET_GAP;
    descr_list->mm[i].t2.ctrl = 0;
    descr_list->mm[i].t2.next = virt_to_phys(&(descr_list->mm[i].p[0].d[0]));
    descr_list->mm[i].t2.buf = virt_to_phys(&(ser_data->mm_pause[0]));
    descr_list->mm[i].t2.hw_len = 0;
    descr_list->mm[i].t2.status = 0;
    descr_list->mm[i].t2.fifo_len = 0;

    // The same, but with the descriptor interrupt set.

    descr_list->mm[i].tr.sw_len = i ? TRAIN_MM_INTER_PACKET_GAP :
                                      TRAIN_MM_MAX_PACKET_GAP;
    descr_list->mm[i].tr.ctrl = d_int;
    descr_list->mm[i].tr.next = virt_to_phys(&(descr_list->mm[i].p[0].d[0]));
    descr_list->mm[i].tr.buf = virt_to_phys(&(ser_data->mm_pause[0]));
    descr_list->mm[i].tr.hw_len = 0;
    descr_list->mm[i].tr.status = 0;
    descr_list->mm[i].tr.fifo_len = 0;
  }
}

//----- Initialize R_SYNC_SERIAL_PRESCALE. -----*

static inline void init_sser_prescale (void)
{
  * R_SYNC_SERIAL_PRESCALE =
      IO_STATE(R_SYNC_SERIAL_PRESCALE, clk_sel_u3, baudrate) |
      IO_STATE(R_SYNC_SERIAL_PRESCALE, word_stb_sel_u3, external) |
      IO_STATE(R_SYNC_SERIAL_PRESCALE, clk_sel_u1, baudrate) |
      IO_STATE(R_SYNC_SERIAL_PRESCALE, word_stb_sel_u1, external) |
      IO_STATE(R_SYNC_SERIAL_PRESCALE, prescaler, div1) |
      IO_STATE(R_SYNC_SERIAL_PRESCALE, warp_mode, normal) |
      IO_FIELD(R_SYNC_SERIAL_PRESCALE, frame_rate, 0) |
      IO_FIELD(R_SYNC_SERIAL_PRESCALE, word_rate, 7);
}

//----- Start DMA and serial ports. -----*

static int init_sser_port (struct train_dev * my_dev)
{
  struct train_dma_descr_pool * descr_list;
  struct train_dma_irq_reg_info info;

  descr_list = my_dev->descr_list;
  get_dma_irq_reg_info(&info, my_dev->nr);

  // Setup the serial port but do not start it yet.

  * info.sser_ctrl = sser_ctrl_val |
                     IO_STATE(R_SYNC_SERIAL1_CTRL, tr_enable, disable);

  // Register DMA and DMA irq.

  if (request_irq(info.dma_irq_nr,
                  tr_interrupt,
                  SA_INTERRUPT,
                  info.dma_irq_descr,
                  my_dev)) {
    printk(KERN_ALERT "DMA %d irq (%d) busy, give up.\n",
           info.dma_nr, info.dma_irq_nr);
    return -EBUSY;
  }
  else if (cris_request_dma(info.dma_nr,
                            info.dma_irq_descr,
                            DMA_VERBOSE_ON_ERROR,
                            info.owner)) {
    free_irq(info.dma_irq_nr, my_dev);
    printk(KERN_ALERT "DMA %d busy, give up.\n", info.dma_nr);
    return -EBUSY;
  }

  // Reset DMA and wait for reset completion.

  * info.dma_cmd = IO_STATE(R_DMA_CH4_CMD, cmd, reset);
  while((* info.dma_cmd & IO_MASK(R_DMA_CH4_CMD, cmd )) !=
         IO_STATE(R_DMA_CH4_CMD, cmd, hold));

  // Clear DMA interrupts and turn on/off interrupt mask.

  * info.clr_intr = IO_STATE(R_DMA_CH4_CLR_INTR, clr_descr, do) |
                    IO_STATE(R_DMA_CH4_CLR_INTR, clr_eop, do);
  * R_IRQ_MASK2_CLR = 2 << info.mask_bit;     // Disable eop interrupt.
  * R_IRQ_MASK2_SET = 1 << info.mask_bit;     // Enable descr interrupt.

  // Re-initialize the completion signalling.

  INIT_COMPLETION(* my_dev->cmpl);

  // Start DMA and wait until it has started to fill the FIFO.

  my_dev->descr_list_nr = 0;
	* info.dma_first = virt_to_phys(&(descr_list->mm[0].tr));
	* info.dma_cmd = IO_STATE(R_DMA_CH4_CMD, cmd, start);
  while (* info.dma_status == 0);

  // Start the serial port.

  * info.sser_ctrl = sser_ctrl_val |
                     IO_STATE(R_SYNC_SERIAL1_CTRL, tr_enable, enable);
  return 0;
}

//----- Stop DMA and serial ports. -----*

static void stop_sser_port (struct train_dev * my_dev)
{
  struct train_dma_irq_reg_info info;

  get_dma_irq_reg_info(&info, my_dev->nr);
  * info.sser_ctrl = sser_ctrl_val |
                     IO_STATE(R_SYNC_SERIAL1_CTRL, tr_enable, disable);
  * info.dma_cmd = IO_STATE(R_DMA_CH4_CMD, cmd, reset);

  // We need to reset DMA before clearing interrupts to make sure no
  // new interrupts can occur. However, interrupts should be masked
  // off and cleared before the DMA is released to a possible other user.
  // Therefore, we need to do an explicit DMA reset here even though
  // cris_free_dma() resets the DMA.

  while((* info.dma_cmd & IO_MASK(R_DMA_CH4_CMD, cmd )) !=
         IO_STATE(R_DMA_CH4_CMD, cmd, hold));
  * R_IRQ_MASK2_CLR = 3 << info.mask_bit;              // Disable interrupts.
  * info.clr_intr = IO_STATE(R_DMA_CH4_CLR_INTR, clr_descr, do) |
                    IO_STATE(R_DMA_CH4_CLR_INTR, clr_eop, do);
  free_irq(info.dma_irq_nr, my_dev);
  cris_free_dma(info.dma_nr, info.dma_irq_descr);      // deallocate DMA.
}

//----- Check validity of Märklin/Motorola command. -----*

static inline int mm_ok(size_t size)
{
  return (size == 4) ? 1 : 0;    // This command is always 4 bytes long.
}                                // All possible data values are allowed.

//----- Check validity of DCC command. -----*

static inline int dcc_ok (struct train_packet_data * data, size_t size)
{
  return 0;     // Not yet implemented.
}

//----- Check validity of mfx command. -----*

static inline int mfx_ok (struct train_packet_data * data, size_t size)
{
  return 0;     // Not yet implemented.
}

//----- Märklin/Motorola change pause command. -----*

// This can be done without lock, since the write to the sw_len field
// is atomic by nature (assuming word-aligned descriptors).

static inline void mm_pause_cfg (struct train_packet_data * data,
                                 size_t size, struct train_dev * dev)
{
  u16 val;

  if (size != 3) {
    return;
  }
  val = * ((u16 *) (&(data->data[1])));
  if (val > TRAIN_MM_MAX_PACKET_GAP) {
    val = TRAIN_MM_MAX_PACKET_GAP;          // Do not allow too long pauses.
  }
  if (val == TRAIN_MM_UNIVERSAL_PACKET_GAP) {
    dev->descr_list->mm[0].t2.sw_len = TRAIN_MM_MAX_PACKET_GAP;
    dev->descr_list->mm[0].tr.sw_len = TRAIN_MM_MAX_PACKET_GAP;
    dev->descr_list->mm[1].t2.sw_len = TRAIN_MM_INTER_PACKET_GAP;
    dev->descr_list->mm[1].tr.sw_len = TRAIN_MM_INTER_PACKET_GAP;
  }
  else {
    dev->descr_list->mm[0].t2.sw_len = val;
    dev->descr_list->mm[0].tr.sw_len = val;
    dev->descr_list->mm[1].t2.sw_len = val;
    dev->descr_list->mm[1].tr.sw_len = val;
  }
}

//------------------------------------------------------------------
// DMA interrupt handler.
//------------------------------------------------------------------

// Only run the irq we actually got (unlike the std serial driver).

static irqreturn_t tr_interrupt(int irq, void * dev_id, struct pt_regs * regs)
{
  u32 ireg;
  struct train_dev * my_dev;

  my_dev = (struct train_dev *) dev_id;
  ireg = * R_IRQ_MASK2_RD;
  if (my_dev->nr) {
    ireg >>= IO_BITNR(R_IRQ_MASK2_RD, dma4_descr);
    * R_DMA_CH4_CLR_INTR = IO_STATE(R_DMA_CH4_CLR_INTR, clr_descr, do) |
                           IO_STATE(R_DMA_CH4_CLR_INTR, clr_eop, do);
  }
  else {
    ireg >>= IO_BITNR(R_IRQ_MASK2_RD, dma8_descr);
    * R_DMA_CH8_CLR_INTR = IO_STATE(R_DMA_CH8_CLR_INTR, clr_descr, do) |
                           IO_STATE(R_DMA_CH8_CLR_INTR, clr_eop, do);
  }
  if (ireg & 1) {
    my_dev->descr_list_nr ^= 1;      // Switch to the other DMA list.
    complete(my_dev->cmpl);
  }
  return IRQ_RETVAL(ireg & 3);
}

//------------------------------------------------------------------
// File operations.
//------------------------------------------------------------------

//----- Open function. -----*

int train_open (struct inode * my_inode, struct file * my_file)
{
  struct train_dev * my_dev;

  my_dev = container_of(my_inode->i_cdev, struct train_dev, my_cdev);
  my_file->private_data = my_dev;
  
  // Start the transmission on the first open. Protected by semaphore.

  if (down_interruptible(&my_dev->sem)) return -ERESTARTSYS;
  if (my_dev->ref_cnt == 0) {
    if (init_sser_port(my_dev)) {
      up(&my_dev->sem);
      return -EBUSY;
    }
  }
  my_dev->ref_cnt++;
  up(&my_dev->sem);

  return 0;
}

//----- Release function. -----*

int train_release (struct inode * my_inode, struct file * my_file)
{
  struct train_dev * my_dev;

  my_dev = container_of(my_inode->i_cdev, struct train_dev, my_cdev);

  // Stop the transmission on the last close. Protected by semaphore.

  if (down_interruptible(&my_dev->sem)) return -ERESTARTSYS;
  my_dev->ref_cnt--;
  if (my_dev->ref_cnt == 0) {
    stop_sser_port(my_dev);
  }
  up(&my_dev->sem);

  return 0;
}

//----- Write function. -----*

ssize_t train_write (struct file * my_file, const char __user * my_data,
                 size_t my_size, loff_t * offp)
{
  struct train_dev * my_dev;
  struct train_packet_data data;
  int i;
  u32 d;

  // Ignore (silently consume) too short or too long packages.

  if ((my_size < 3) || (my_size > sizeof(struct train_packet_data))) {
    return my_size;
  }

  // Copy data to dynamic variable. No lock needed for this.
  
  if (copy_from_user(&data, my_data, my_size)) return -EFAULT;
  
  // Get device structure.

  my_dev = my_file->private_data;

  // Check validity of train commands, and perform configuration commands.
  // Just silently consume a command if it is not OK.
  
  switch (data.data[0]) {

    // Train commands.

    case train_cmd_dcc:
    if (!dcc_ok(&data, my_size)) {
      return my_size;
    }
    break;

    case train_cmd_mm:
    if (!mm_ok(my_size)) {
      return my_size;
    }
    break;

    case train_cmd_mfx:
    if (!mfx_ok(&data, my_size)) {
      return my_size;
    }
    break;

    // Configuration commands, can be performed immediately.

    case train_cmd_mm_pause:
    mm_pause_cfg(&data, my_size, my_dev);
    return my_size;
    break;

    default:
    return my_size;
    break;
  }

  // If we get down to here, the command is a valid train command, and
  // should be inserted into the DMA list. But first we have to wait for
  // one of the DMA lists to be free.

  i = (int) wait_for_completion_interruptible_timeout(my_dev->cmpl,
                                                      TRAIN_WR_TIMEOUT);
  if (i <= 0) {
    return i;                          // Command interrupted or timed out.
  }

  // Then do the update.

  switch(data.data[0]) {
    // case train_cmd_mfx:
    // Not implemented.
    // break;

    // case train_cmd_dcc:
    // Not implemented.
    // break;

    case train_cmd_mm:
    // Märklin/Motorola command.

    d = (* ((u32 *) data.data)) >> 8;

    // Point the descriptors to the data.

    for (i = 0; i < 6; i++) {
      my_dev->descr_list->mm[my_dev->descr_list_nr].p[0].d[i].buf =
      my_dev->descr_list->mm[my_dev->descr_list_nr].p[1].d[i].buf =
        virt_to_phys(&(ser_data->mm_data[d & 7]));
      d >>= 3;
    }

    // Loop back the new list to itself.

    my_dev->descr_list->mm[my_dev->descr_list_nr].p[1].d[5].next =
      virt_to_phys(&(my_dev->descr_list->mm[my_dev->descr_list_nr].t2));

    // Append the new list to the old one.

    my_dev->descr_list->mm[my_dev->descr_list_nr ^ 1].p[1].d[5].next =
      virt_to_phys(&(my_dev->descr_list->mm[my_dev->descr_list_nr].tr));
    break;

    default:
    complete(my_dev->cmpl);   // Should never happen, but if it does,
    break;                    // this is the most sane to do.
  }

  return my_size;
}

//------------------------------------------------------------------
// Init and exit functions.
//------------------------------------------------------------------

//----- Init function. -----*

static int __init train_init(void)
{
  int i;
  int count;
  int tmp;

  // Check validity of module parameters.

  if (((sser1 | 1) != 1) || ((sser3 | 1) != 1) || ((sticky | 1) != 1)) {
    printk(KERN_ALERT "Error: train_mod: bad module parameters.\n");
    tmp = -EINVAL;
    goto init_err;
  }
  count = sser1 + sser3;
  if ((major <= 0) || (major >= 256) || (count == 0) ||
      (minor < 0) || (minor >= 256) || ((minor + count) >= 256)) {
    printk(KERN_ALERT "Error: train_mod: bad module parameters.\n");
    tmp = -EINVAL;
    goto init_err;
  }

  // Register port numbers.

  dev = MKDEV(SYNCSER_MAJOR, minor);
  tmp = register_chrdev_region(dev, count, SYNCSER_NAME);
  if (tmp) {
    printk(KERN_ALERT "Error: train_mod: failed to register chrdev region.\n");
    goto init_err;
  }
  train_init_level = train_init_region;

  // Grab the I/O port(s).

  if (USB1_ALLOCATED && (sser1 || USB1_ALLOCATION_BUG)) {

    // Deallocate usb1 if allocated.
    // Should only be needed for sync_serial_1, but unfortunately there is a
    // bug in io_interface_mux.c so it is needed for sync_serial_3 too.

    cris_free_io_interface(if_usb_1);
  }

  if (sser3) {
    cris_free_io_interface(if_serial_3);
    tmp = cris_request_io_interface(if_sync_serial_3, "syncser3");
    if (tmp) {
      printk(KERN_ALERT "Error: train_mod: failed to grab I/O interface.\n");
      goto init_err;
    }
  }
  train_init_level = train_init_port1;
  if (sser1) {
    tmp = cris_request_io_interface(if_sync_serial_1, "syncser1");
    if (tmp) {
      printk(KERN_ALERT "Error: train_mod: failed to grab I/O interface.\n");
      goto init_err;
    }
  }
  
  // This is buggy since the "usb1" string will be lost when the module
  // is closed down. The interfaces struct will hold a wild pointer then,
  // which can lead to crashes if someone else requests the pins without
  // getting them.

  else if (USB1_ALLOCATED && USB1_ALLOCATION_BUG) {       // Re-register
    tmp = cris_request_io_interface(if_usb_1, "usb1");    // USB port 1.
    if (tmp) goto init_err;
  }
  train_init_level = train_init_ports;

  // Allocate memory for the DMA descriptors and buffers.

  dma_mem_base = kmalloc(sizeof(struct train_ser_data) +
                         count * sizeof(struct train_dma_descr_pool),
                         GFP_DMA);
  if (!dma_mem_base) {
    printk(KERN_ALERT "Error: train_mod: out of memory.\n");
    tmp = -ENOMEM;
    goto init_err;
  }

  // Initialize DMA data buffers. These will only be accessed by DMA after
  // initialization.
  
  ser_data = dma_mem_base + count * sizeof(struct train_dma_descr_pool);
  init_ser_data(ser_data);
  train_init_level = train_init_started;

  // Initialize R_SYNC_SERIAL_PRESCALE.
  // There should really be a system wide shadow register for this, but
  // since there isn't, we have to overwrite the whole register.
  // Therefore, there is no way for this driver to coexist with
  // another driver for the other sync serial port. It would be difficult
  // anyway, since some settings are common to both ports.

  init_sser_prescale ();

  // Initialize each device.
  
  for (i = 0; i < count; i++) {
    train_devices[i].descr_list = dma_mem_base +
                                  i * sizeof(struct train_dma_descr_pool);
    init_descr_list(train_devices[i].descr_list);
    train_devices[i].nr = sser1 ? i : i + 1;
    train_devices[i].ref_cnt = 0;
    train_devices[i].cmpl = i ? &cmpl1 : &cmpl0;
    if (sticky) {
      tmp = init_sser_port(&train_devices[i]);
      if (tmp) goto init_err;
      train_devices[i].ref_cnt++;
      train_init_level = i ? train_init_dma1 : train_init_dma0;
    }
    init_MUTEX(&train_devices[i].sem);
    tmp = train_setup_cdev(&train_devices[i], i);
    if (tmp) {
      printk(KERN_ALERT "Error: train_mod: failed cdev init.\n");
      goto init_err;
    }
    train_init_level = train_init_cdev0;
  }
  train_init_level = train_init_success;
  printk(KERN_ALERT "Registered train_mod.\n");
  return 0;
  
  // Error returns.
  
  init_err:
  train_exit();
  return tmp;
}

//----- Exit function. -----*

static void __exit train_exit(void)
{
  int count;
  
  count = (sser1 + sser3);
  
  switch (train_init_level) {
    case train_init_success:
    if (count == 2) cdev_del(&train_devices[1].my_cdev);
    // Fall through.
    case train_init_dma1:
    if ((count == 2) && train_devices[1].ref_cnt) {
      stop_sser_port(&train_devices[1]);
    }
    // Fall through.
    case train_init_cdev0:
    cdev_del(&train_devices[0].my_cdev);
    // Fall through.
    case train_init_dma0:
    if (train_devices[0].ref_cnt) stop_sser_port(&train_devices[0]);
    // Fall through.
    case train_init_started:
    kfree(dma_mem_base);
    // Fall through.
    case train_init_ports:
    if (sser1) cris_free_io_interface(if_sync_serial_1);
    // Fall through.
    case train_init_port1:
    if (sser3) cris_free_io_interface(if_sync_serial_3);
    // Fall through.
    case train_init_region:
    unregister_chrdev_region(dev, count);
    // Fall through.
    case train_init_none:
    printk(KERN_ALERT "Goodbye from train_mod.\n");
    break;
    default:
    printk(KERN_ALERT "Error: train_mod: invalid init level.\n");
    break;
  }
}

module_init(train_init);
module_exit(train_exit);

//------------------------------------------------------------------
// Module information.
//------------------------------------------------------------------

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Per Zander http://home.swipnet.se/perz/contact.html");
MODULE_DESCRIPTION("Sync serial driver for model train control");
MODULE_VERSION("0.0");
