Compare commits

...

25 Commits

Author SHA1 Message Date
Gerardo Salazar
be6ead75e9 Revert some changes and adds pandas memory allocator 2020-12-06 19:15:38 -08:00
Gerardo Salazar
25f7ccf0c2 Performance improvements 2020-12-06 19:15:38 -08:00
Gerardo Salazar
afb0a86291 Faster double casting than CLR implementation 2020-12-06 19:14:48 -08:00
Gerardo Salazar
f5c5faa3d4 Remove bindingRedirect in Tests and re-adds pyarrow to DockerfileLeanFoundation 2020-12-06 19:14:48 -08:00
Gerardo Salazar
c471d1ced0 Adds unit tests and fixes various bugs in PandasConverter 2020-12-06 19:14:48 -08:00
Gerardo Salazar
5c345faea7 Revert changes to DockerfileLeanFoundation 2020-12-06 19:14:24 -08:00
Gerardo Salazar
11bed1f0c3 Address self-review: Adds explanitory comments and cleans up code 2020-12-06 19:14:24 -08:00
Gerardo Salazar
ec57799a5d Reverts changes and deletes unused classes 2020-12-06 19:14:24 -08:00
Gerardo Salazar
4c8aa638eb Fixes failing unit tests by converting filter mask to PyObject[] 2020-12-06 19:13:54 -08:00
Gerardo Salazar
7c3caec07f Use external NuGet package for Arrow
* Updates CI for Arrow
  * Updates DockerfileLeanFoundation
  * Adds Arrow to Python packages tests
2020-12-06 19:13:54 -08:00
Gerardo Salazar
45f71543bd Filters duplicates more efficiently, fixes warning on removal of dupes
* Commit includes behind the scenes testing and validation of the data
    and performance as a result of modifying the dupe removal process
2020-12-06 19:13:54 -08:00
Gerardo Salazar
13b9d91ecb Memory leak fixes for options and performance improvement for NA replace 2020-12-06 19:13:54 -08:00
Gerardo Salazar
f239018e77 Tames the wild beast of memory leaks, for QuoteBar data 2020-12-06 19:13:54 -08:00
Gerardo Salazar
f1c98b848a Partially fixes memory leak in PandasConverter 2020-12-06 19:13:54 -08:00
Gerardo Salazar
2c7bde422a Reuses MemoryStream to avoid having to reallocate consistently
* Cleans up .csproj and packages.config
  * Adds IDisposable to PandasConverter, PandasArrowMemoryAllocator
2020-12-06 19:13:54 -08:00
Gerardo Salazar
9e95bfea90 Fixes various bugs and matches previous version for backwards compat 2020-12-06 19:13:35 -08:00
Gerardo Salazar
b820ff4de8 EOD commit; Attempts to implement alt. data support 2020-12-06 19:13:35 -08:00
Gerardo Salazar
8218a2be99 Fixes bug with options and refactors code
* Option index is now 1-1 with existing impl
  * Begin adding support for custom data
2020-12-06 19:13:35 -08:00
Gerardo Salazar
9e3031c562 Fixes options support
* 1 bug remaining: Get index to be ''
2020-12-06 19:13:35 -08:00
Gerardo Salazar
2cf9239b6e Improves performance by ~10% of Arrow implementation
* New allocator improves performance by reusing allocated buffers
2020-12-06 19:13:35 -08:00
Gerardo Salazar
4c63c60a89 Adds option expiry support 2020-12-06 19:13:35 -08:00
Gerardo Salazar
9a2e47b05c Adds support for future expiry and proper indexing 2020-12-06 19:13:35 -08:00
Gerardo Salazar
711d46d6e2 Fixes bugs with index and handling of no data 2020-12-06 19:13:35 -08:00
Gerardo Salazar
5d6625c1ea Adds support for Tick, OpenInterest
* Performance improvement for Symbol creation in TimeSliceFactory for
    futures
2020-12-06 19:13:34 -08:00
Gerardo Salazar
0a70611d81 MVP of Apache Arrow in-memory pandas DataFrame construction 2020-12-06 19:13:16 -08:00
30 changed files with 2428 additions and 788 deletions

View File

@@ -16,6 +16,7 @@ before_install:
- conda install -y cython=0.29.15
- conda install -y scipy=1.4.1
- conda install -y wrapt=1.12.1
- pip install pyarrow==1.0.1
install:
- nuget restore QuantConnect.Lean.sln
- nuget install NUnit.Runners -Version 3.11.1 -OutputDirectory testrunner

View File

@@ -33,13 +33,15 @@ Before we enable python support, follow the [installation instructions](https://
- Value of the variable: python installation path.
4. Install [pandas=0.25.3](https://pandas.pydata.org/) and its [dependencies](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies).
5. Install [wrapt=1.11.2](https://pypi.org/project/wrapt/) module.
6. Reboot computer to ensure changes are propogated.
6. Install [pyarrow=1.0.1](https://arrow.apache.org/install/) module.
7. Reboot computer to ensure changes are propagated.
#### [macOS](https://github.com/QuantConnect/Lean#macos)
1. Use the macOS x86-64 package installer from [Anaconda](https://repo.anaconda.com/archive/Anaconda3-5.2.0-MacOSX-x86_64.pkg) and follow "[Installing on macOS](https://docs.anaconda.com/anaconda/install/mac-os)" instructions from Anaconda documentation page.
2. Install [pandas=0.25.3](https://pandas.pydata.org/) and its [dependencies](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies).
3. Install [wrapt=1.11.2](https://pypi.org/project/wrapt/) module.
4. Install [pyarrow=1.0.1](https://arrow.apache.org/install/) module.
*Note:* If you encounter the "System.DllNotFoundException: python3.6m" runtime error when running Python algorithms on macOS:
1. Find `libpython3.6m.dylib` in your Python installation folder. If you installed Python with Anaconda, it may be found at
@@ -64,6 +66,7 @@ conda update -y python conda pip
conda install -y cython=0.29.11
conda install -y pandas=0.25.3
conda install -y wrapt=1.11.2
pip install pyarrow==1.0.1
```
*Note 1:* There is a [known issue](https://github.com/pythonnet/pythonnet/issues/609) with python 3.6.5 that prevents pythonnet installation, please upgrade python to version 3.6.8:

View File

@@ -54,17 +54,8 @@ namespace QuantConnect.Data.Market
/// <param name="time">The time this data was emitted.</param>
public DataDictionary(DateTime time)
{
#pragma warning disable 618 // This assignment is left here until the Time property is removed.
Time = time;
#pragma warning restore 618
}
/// <summary>
/// Gets or sets the time associated with this collection of data
/// </summary>
[Obsolete("The DataDictionary<T> Time property is now obsolete. All algorithms should use algorithm.Time instead.")]
public DateTime Time { get; set; }
/// <summary>
/// Returns an enumerator that iterates through the collection.
/// </summary>
@@ -300,4 +291,4 @@ namespace QuantConnect.Data.Market
dictionary.Add(data.Symbol, data);
}
}
}
}

View File

@@ -47,7 +47,7 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="ticker">The ticker of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new Delisting this[string ticker] { get { return base[ticker]; } set { base[ticker] = value; } }
public new Delisting this[string ticker] { get { return base[ticker]; } internal set { base[ticker] = value; } }
/// <summary>
/// Gets or sets the Delisting with the specified Symbol.
@@ -57,6 +57,6 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="symbol">The Symbol of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new Delisting this[Symbol symbol] { get { return base[symbol]; } set { base[symbol] = value; } }
public new Delisting this[Symbol symbol] { get { return base[symbol]; } internal set { base[symbol] = value; } }
}
}

View File

@@ -47,7 +47,7 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="ticker">The ticker of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new Dividend this[string ticker] { get { return base[ticker]; } set { base[ticker] = value; } }
public new Dividend this[string ticker] { get { return base[ticker]; } internal set { base[ticker] = value; } }
/// <summary>
/// Gets or sets the Dividend with the specified Symbol.
@@ -57,6 +57,6 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="symbol">The Symbol of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new Dividend this[Symbol symbol] { get { return base[symbol]; } set { base[symbol] = value; } }
public new Dividend this[Symbol symbol] { get { return base[symbol]; } internal set { base[symbol] = value; } }
}
}

View File

@@ -45,7 +45,7 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="ticker">The ticker of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new FuturesChain this[string ticker] { get { return base[ticker]; } set { base[ticker] = value; } }
public new FuturesChain this[string ticker] { get { return base[ticker]; } internal set { base[ticker] = value; } }
/// <summary>
/// Gets or sets the FuturesChain with the specified Symbol.
@@ -55,6 +55,6 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="symbol">The Symbol of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new FuturesChain this[Symbol symbol] { get { return base[symbol]; } set { base[symbol] = value; } }
public new FuturesChain this[Symbol symbol] { get { return base[symbol]; } internal set { base[symbol] = value; } }
}
}
}

View File

@@ -45,7 +45,7 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="ticker">The ticker of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new OptionChain this[string ticker] { get { return base[ticker]; } set { base[ticker] = value; } }
public new OptionChain this[string ticker] { get { return base[ticker]; } internal set { base[ticker] = value; } }
/// <summary>
/// Gets or sets the OptionChain with the specified Symbol.
@@ -55,6 +55,6 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="symbol">The Symbol of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new OptionChain this[Symbol symbol] { get { return base[symbol]; } set { base[symbol] = value; } }
public new OptionChain this[Symbol symbol] { get { return base[symbol]; } internal set { base[symbol] = value; } }
}
}
}

View File

@@ -45,7 +45,7 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="ticker">The ticker of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new QuoteBar this[string ticker] { get { return base[ticker]; } set { base[ticker] = value; } }
public new QuoteBar this[string ticker] { get { return base[ticker]; } internal set { base[ticker] = value; } }
/// <summary>
/// Gets or sets the QuoteBar with the specified Symbol.
@@ -55,6 +55,6 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="symbol">The Symbol of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new QuoteBar this[Symbol symbol] { get { return base[symbol]; } set { base[symbol] = value; } }
public new QuoteBar this[Symbol symbol] { get { return base[symbol]; } internal set { base[symbol] = value; } }
}
}
}

View File

@@ -47,7 +47,7 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="ticker">The ticker of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new Split this[string ticker] { get { return base[ticker]; } set { base[ticker] = value; } }
public new Split this[string ticker] { get { return base[ticker]; } internal set { base[ticker] = value; } }
/// <summary>
/// Gets or sets the Split with the specified Symbol.
@@ -57,6 +57,6 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="symbol">The Symbol of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new Split this[Symbol symbol] { get { return base[symbol]; } set { base[symbol] = value; } }
public new Split this[Symbol symbol] { get { return base[symbol]; } internal set { base[symbol] = value; } }
}
}
}

View File

@@ -47,7 +47,7 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="ticker">The ticker of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new SymbolChangedEvent this[string ticker] { get { return base[ticker]; } set { base[ticker] = value; } }
public new SymbolChangedEvent this[string ticker] { get { return base[ticker]; } internal set { base[ticker] = value; } }
/// <summary>
/// Gets or sets the SymbolChangedEvent with the specified Symbol.
@@ -57,6 +57,6 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="symbol">The Symbol of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new SymbolChangedEvent this[Symbol symbol] { get { return base[symbol]; } set { base[symbol] = value; } }
public new SymbolChangedEvent this[Symbol symbol] { get { return base[symbol]; } internal set { base[symbol] = value; } }
}
}

View File

@@ -48,7 +48,7 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="ticker">The ticker of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new List<Tick> this[string ticker] { get { return base[ticker]; } set { base[ticker] = value; } }
public new List<Tick> this[string ticker] { get { return base[ticker]; } internal set { base[ticker] = value; } }
/// <summary>
/// Gets or sets the list of Tick with the specified Symbol.
@@ -58,6 +58,6 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="symbol">The Symbol of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new List<Tick> this[Symbol symbol] { get { return base[symbol]; } set { base[symbol] = value; } }
public new List<Tick> this[Symbol symbol] { get { return base[symbol]; } internal set { base[symbol] = value; } }
}
}

View File

@@ -46,7 +46,7 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="ticker">The ticker of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new TradeBar this[string ticker] { get { return base[ticker]; } set { base[ticker] = value; } }
public new TradeBar this[string ticker] { get { return base[ticker]; } internal set { base[ticker] = value; } }
/// <summary>
/// Gets or sets the TradeBar with the specified Symbol.
@@ -56,6 +56,6 @@ namespace QuantConnect.Data.Market
/// </returns>
/// <param name="symbol">The Symbol of the element to get or set.</param>
/// <remarks>Wraps the base implementation to enable indexing in python algorithms due to pythonnet limitations</remarks>
public new TradeBar this[Symbol symbol] { get { return base[symbol]; } set { base[symbol] = value; } }
public new TradeBar this[Symbol symbol] { get { return base[symbol]; } internal set { base[symbol] = value; } }
}
}
}

View File

@@ -203,15 +203,15 @@ namespace QuantConnect.Data
/// <param name="time">The timestamp for this slice of data</param>
/// <param name="data">The raw data in this slice</param>
public Slice(DateTime time, List<BaseData> data)
: this(time, data, CreateCollection<TradeBars, TradeBar>(time, data),
CreateCollection<QuoteBars, QuoteBar>(time, data),
CreateTicksCollection(time, data),
CreateCollection<OptionChains, OptionChain>(time, data),
CreateCollection<FuturesChains, FuturesChain>(time, data),
CreateCollection<Splits, Split>(time, data),
CreateCollection<Dividends, Dividend>(time, data),
CreateCollection<Delistings, Delisting>(time, data),
CreateCollection<SymbolChangedEvents, SymbolChangedEvent>(time, data))
: this(time, data, CreateCollection<TradeBars, TradeBar>(data),
CreateCollection<QuoteBars, QuoteBar>(data),
CreateTicksCollection(data),
CreateCollection<OptionChains, OptionChain>(data),
CreateCollection<FuturesChains, FuturesChain>(data),
CreateCollection<Splits, Split>(data),
CreateCollection<Dividends, Dividend>(data),
CreateCollection<Delistings, Delisting>(data),
CreateCollection<SymbolChangedEvents, SymbolChangedEvent>(data))
{
}
@@ -500,9 +500,9 @@ namespace QuantConnect.Data
/// <summary>
/// Dynamically produces a <see cref="Ticks"/> data dictionary using the provided data
/// </summary>
private static Ticks CreateTicksCollection(DateTime time, IEnumerable<BaseData> data)
private static Ticks CreateTicksCollection(IEnumerable<BaseData> data)
{
var ticks = new Ticks(time);
var ticks = new Ticks();
foreach (var tick in data.OfType<Tick>())
{
List<Tick> listTicks;
@@ -523,16 +523,11 @@ namespace QuantConnect.Data
/// <param name="time">The current slice time</param>
/// <param name="data">The data to create the collection</param>
/// <returns>The data dictionary of <typeparamref name="TItem"/> containing all the data of that type in this slice</returns>
private static T CreateCollection<T, TItem>(DateTime time, IEnumerable<BaseData> data)
private static T CreateCollection<T, TItem>(IEnumerable<BaseData> data)
where T : DataDictionary<TItem>, new()
where TItem : BaseData
{
var collection = new T
{
#pragma warning disable 618 // This assignment is left here until the Time property is removed.
Time = time
#pragma warning restore 618
};
var collection = new T();
foreach (var item in data.OfType<TItem>())
{
collection[item.Symbol] = item;

View File

@@ -118,6 +118,10 @@ namespace QuantConnect
{
Guids.Add(guid);
}
/// Unix epoch (1970-01-01 00:00:00.000000000Z)
/// </summary>
public static DateTime UnixEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
/// <summary>
/// Serialize a list of ticks using protobuf
@@ -1298,14 +1302,13 @@ namespace QuantConnect
/// <param name="to">The time zone to be converted to</param>
/// <param name="strict">True for strict conversion, this will throw during ambiguitities, false for lenient conversion</param>
/// <returns>The time in terms of the to time zone</returns>
public static DateTime ConvertTo(this DateTime time, DateTimeZone from, DateTimeZone to, bool strict = false)
public static DateTime ConvertTo(this DateTime time, DateTimeZone from, DateTimeZone to)
{
if (strict)
{
return from.AtStrictly(LocalDateTime.FromDateTime(time)).WithZone(to).ToDateTimeUnspecified();
}
var instant = new Instant(time.Ticks - UnixEpoch.Ticks);
var fromOffset = from.GetUtcOffset(instant).ToTimeSpan();
var toOffset = to.GetUtcOffset(instant).ToTimeSpan();
return from.AtLeniently(LocalDateTime.FromDateTime(time)).WithZone(to).ToDateTimeUnspecified();
return time - (fromOffset - toOffset);
}
/// <summary>
@@ -1315,11 +1318,12 @@ namespace QuantConnect
/// <param name="to">The destinatio time zone</param>
/// <param name="strict">True for strict conversion, this will throw during ambiguitities, false for lenient conversion</param>
/// <returns>The time in terms of the <paramref name="to"/> time zone</returns>
public static DateTime ConvertFromUtc(this DateTime time, DateTimeZone to, bool strict = false)
public static DateTime ConvertFromUtc(this DateTime time, DateTimeZone to)
{
return time.ConvertTo(TimeZones.Utc, to, strict);
return time + to.GetUtcOffset(new Instant(time.Ticks - UnixEpoch.Ticks)).ToTimeSpan();
}
/// <summary>
/// Converts the specified time from the <paramref name="from"/> time zone to <see cref="TimeZones.Utc"/>
/// </summary>
@@ -1327,14 +1331,9 @@ namespace QuantConnect
/// <param name="from">The time zone the specified <paramref name="time"/> is in</param>
/// <param name="strict">True for strict conversion, this will throw during ambiguitities, false for lenient conversion</param>
/// <returns>The time in terms of the to time zone</returns>
public static DateTime ConvertToUtc(this DateTime time, DateTimeZone from, bool strict = false)
public static DateTime ConvertToUtc(this DateTime time, DateTimeZone from)
{
if (strict)
{
return from.AtStrictly(LocalDateTime.FromDateTime(time)).ToDateTimeUtc();
}
return from.AtLeniently(LocalDateTime.FromDateTime(time)).ToDateTimeUtc();
return time.Subtract(from.GetUtcOffset(Instant.FromTicksSinceUnixEpoch((time.Ticks - UnixEpoch.Ticks))).ToTimeSpan());
}
/// <summary>

View File

@@ -0,0 +1,189 @@
/*
* QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
* Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Buffers;
using System.Collections.Generic;
using System.Linq;
using Apache.Arrow.Memory;
namespace QuantConnect.Python
{
public class PandasArrowMemoryAllocator : NativeMemoryAllocator, IDisposable
{
private bool _disposed;
private readonly List<PandasMemoryOwner> _free = new List<PandasMemoryOwner>();
private readonly List<PandasMemoryOwner> _used = new List<PandasMemoryOwner>();
public PandasArrowMemoryAllocator() : base()
{
}
protected override IMemoryOwner<byte> AllocateInternal(int length, out int bytesAllocated)
{
PandasMemoryOwner owner;
var memoryResizeIndexes = new List<KeyValuePair<int, int>>();
for (var i = 0; i < _free.Count; i++)
{
var memory = _free[i];
if (length > memory.Original.Memory.Length)
{
memoryResizeIndexes.Add(new KeyValuePair<int, int>(i, memory.Original.Memory.Length));
continue;
}
owner = memory;
bytesAllocated = 0;
_free.Remove(owner);
_used.Add(owner);
owner.Reset();
if (length != memory.Original.Memory.Length)
{
owner.Slice(0, length);
}
return owner;
}
if (memoryResizeIndexes.Count != 0)
{
// Get the smallest resizable instance, and reallocate a larger buffer.
var resizeIndex = memoryResizeIndexes.OrderBy(x => x.Value).First();
var resizable = _free[resizeIndex.Key];
resizable.Resize(base.AllocateInternal(length, out bytesAllocated));
_used.Add(resizable);
_free.RemoveAt(resizeIndex.Key);
return resizable;
}
// New allocation, should only be called a few times when we start using the allocator
owner = new PandasMemoryOwner(base.AllocateInternal(length, out bytesAllocated));
_used.Add(owner);
return owner;
}
/// <summary>
/// Frees the underlying memory buffers so that they can be re-used
/// </summary>
public void Free()
{
foreach (var used in _used)
{
_free.Add(used);
}
_used.Clear();
}
private class PandasMemoryOwner : IMemoryOwner<byte>
{
private bool _disposed;
/// <summary>
/// Original memory owner containing the full-length byte-array
/// we initially allocated.
/// </summary>
public IMemoryOwner<byte> Original { get; private set; }
/// <summary>
/// Slice of the original memory owner containing the contents of
/// the buffer Arrow will use. We slice the original memory so
/// that Arrow doesn't panic when it receives a slice with a length
/// longer than it expects when serializing its internal buffer.
/// </summary>
public Memory<byte> Memory { get; private set; }
public PandasMemoryOwner(IMemoryOwner<byte> memory)
{
Original = memory;
Memory = Original.Memory;
}
/// <summary>
/// Creates a slice of the original MemoryOwner and stores the result in <see cref="Memory"/>
/// </summary>
/// <param name="start">Index start of the slice</param>
/// <param name="length">Length of the slice</param>
public void Slice(int start, int length)
{
Memory = Original.Memory.Slice(start, length);
}
/// <summary>
/// Restores the <see cref="Memory"/> slice to its initial value
/// </summary>
public void Reset()
{
Memory = null;
Memory = Original.Memory;
}
/// <summary>
/// Resizes the instance to the new memory size
/// </summary>
/// <param name="newMemory"></param>
public void Resize(IMemoryOwner<byte> newMemory)
{
Original.Dispose();
Original = newMemory;
Memory = null;
Memory = Original.Memory;
}
public void Free()
{
Original.Dispose();
Memory = null;
Original = null;
}
/// <summary>
/// no-op dispose because we want to re-use the MemoryOwner instance after we dispose of a RecordBatch.
/// To dispose of the resources this class owns, use <see cref="Free"/>
/// </summary>
public void Dispose()
{
}
}
public void Dispose()
{
if (_disposed)
{
throw new ObjectDisposedException("PandasArrowMemoryAllocator has already been disposed");
}
foreach (var free in _free)
{
free.Free();
}
foreach (var used in _used)
{
used.Free();
}
_free.Clear();
_used.Clear();
_disposed = true;
}
}
}

1531
Common/Python/PandasConverter.cs Normal file → Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -1,609 +0,0 @@
/*
* QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
* Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Python.Runtime;
using QuantConnect.Data;
using QuantConnect.Data.Market;
using QuantConnect.Util;
using System;
using System.Collections;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Reflection;
namespace QuantConnect.Python
{
/// <summary>
/// Organizes a list of data to create pandas.DataFrames
/// </summary>
public class PandasData
{
private static dynamic _pandas;
private readonly static HashSet<string> _baseDataProperties = typeof(BaseData).GetProperties().ToHashSet(x => x.Name.ToLowerInvariant());
private readonly static ConcurrentDictionary<Type, List<MemberInfo>> _membersByType = new ConcurrentDictionary<Type, List<MemberInfo>>();
private readonly Symbol _symbol;
private readonly Dictionary<string, Tuple<List<DateTime>, List<object>>> _series;
private readonly List<MemberInfo> _members;
/// <summary>
/// Gets true if this is a custom data request, false for normal QC data
/// </summary>
public bool IsCustomData { get; }
/// <summary>
/// Implied levels of a multi index pandas.Series (depends on the security type)
/// </summary>
public int Levels { get; } = 2;
/// <summary>
/// Initializes an instance of <see cref="PandasData"/>
/// </summary>
public PandasData(object data)
{
if (_pandas == null)
{
using (Py.GIL())
{
// this python Remapper class will work as a proxy and adjust the
// input to its methods using the provided 'mapper' callable object
_pandas = PythonEngine.ModuleFromString("remapper",
@"import pandas as pd
from pandas.core.resample import Resampler, DatetimeIndexResampler, PeriodIndexResampler, TimedeltaIndexResampler
from pandas.core.groupby.generic import DataFrameGroupBy, SeriesGroupBy
from pandas.core.indexes.frozen import FrozenList as pdFrozenList
from pandas.core.window import Expanding, EWM, Rolling, Window
from pandas.core.computation.ops import UndefinedVariableError
from inspect import getmembers, isfunction, isgenerator
from functools import partial
from sys import modules
from clr import AddReference
AddReference(""QuantConnect.Common"")
from QuantConnect import *
def mapper(key):
'''Maps a Symbol object or a Symbol Ticker (string) to the string representation of
Symbol SecurityIdentifier. If cannot map, returns the object
'''
keyType = type(key)
if keyType is Symbol:
return str(key.ID)
if keyType is str:
kvp = SymbolCache.TryGetSymbol(key, None)
if kvp[0]:
return str(kvp[1].ID)
if keyType is list:
return [mapper(x) for x in key]
if keyType is tuple:
return tuple([mapper(x) for x in key])
if keyType is dict:
return {k:mapper(v) for k,v in key.items()}
return key
def try_wrap_as_index(obj):
'''Tries to wrap object if it is one of pandas' index objects.'''
objType = type(obj)
if objType is pd.Index:
return True, Index(obj)
if objType is pd.MultiIndex:
result = object.__new__(MultiIndex)
result._set_levels(obj.levels, copy=obj.copy, validate=False)
result._set_codes(obj.codes, copy=obj.copy, validate=False)
result._set_names(obj.names)
result.sortorder = obj.sortorder
return True, result
if objType is pdFrozenList:
return True, FrozenList(obj)
return False, obj
def try_wrap_as_pandas(obj):
'''Tries to wrap object if it is a pandas' object.'''
success, obj = try_wrap_as_index(obj)
if success:
return success, obj
objType = type(obj)
if objType is pd.DataFrame:
return True, DataFrame(data=obj)
if objType is pd.Series:
return True, Series(data=obj)
if objType is tuple:
anySuccess = False
results = list()
for item in obj:
success, result = try_wrap_as_pandas(item)
anySuccess |= success
results.append(result)
if anySuccess:
return True, tuple(results)
return False, obj
def try_wrap_resampler(obj, self):
'''Tries to wrap object if it is a pandas' Resampler object.'''
if not isinstance(obj, Resampler):
return False, obj
klass = CreateWrapperClass(type(obj))
return True, klass(self, groupby=obj.groupby, kind=obj.kind, axis=obj.axis)
def wrap_function(f):
'''Wraps function f with g.
Function g converts the args/kwargs to use alternative index keys
and the result of the f function call to the wrapper objects
'''
def g(*args, **kwargs):
if len(args) > 1:
args = mapper(args)
if len(kwargs) > 0:
kwargs = mapper(kwargs)
try:
result = f(*args, **kwargs)
except UndefinedVariableError as e:
# query/eval methods needs to look for a scope variable at a higher level
# since the wrapper classes are children of pandas classes
kwargs['level'] = kwargs.pop('level', 0) + 1
result = f(*args, **kwargs)
success, result = try_wrap_as_pandas(result)
if success:
return result
success, result = try_wrap_resampler(result, args[0])
if success:
return result
if isgenerator(result):
return ( (k, try_wrap_as_pandas(v)[1]) for k, v in result)
return result
g.__name__ = f.__name__
return g
def wrap_special_function(name, cls, fcls, gcls = None):
'''Replaces the special function of a given class by g that wraps fcls
This is how pandas implements them.
gcls represents an alternative for fcls
if the keyword argument has 'win_type' key for the Rolling/Window case
'''
fcls = CreateWrapperClass(fcls)
if gcls is not None:
gcls = CreateWrapperClass(fcls)
def g(*args, **kwargs):
if kwargs.get('win_type', None):
return gcls(*args, **kwargs)
return fcls(*args, **kwargs)
g.__name__ = name
setattr(cls, g.__name__, g)
def CreateWrapperClass(cls: type):
'''Creates wrapper classes.
Members of the original class are wrapped to allow alternative index look-up
'''
# Define a new class
klass = type(f'{cls.__name__}', (cls,) + cls.__bases__, dict(cls.__dict__))
def g(self, name):
'''Wrap '__getattribute__' to handle indices
Only need to wrap columns, index and levels attributes
'''
attr = object.__getattribute__(self, name)
if name in ['columns', 'index', 'levels']:
_, attr = try_wrap_as_index(attr)
return attr
g.__name__ = '__getattribute__'
g.__qualname__ = g.__name__
setattr(klass, g.__name__, g)
def wrap_union(f):
'''Wraps function f (union) with g.
Special case: The union method from index objects needs to
receive pandas' index objects to avoid infity recursion.
Function g converts the args/kwargs objects to one of pandas index objects
and the result of the f function call back to wrapper indexes objects
'''
def unwrap_index(obj):
'''Tries to unwrap object if it is one of this module wrapper's index objects.'''
objType = type(obj)
if objType is Index:
return pd.Index(obj)
if objType is MultiIndex:
result = object.__new__(pd.MultiIndex)
result._set_levels(obj.levels, copy=obj.copy, validate=False)
result._set_codes(obj.codes, copy=obj.copy, validate=False)
result._set_names(obj.names)
result.sortorder = obj.sortorder
return result
if objType is FrozenList:
return pdFrozenList(obj)
return obj
def g(*args, **kwargs):
args = tuple([unwrap_index(x) for x in args])
result = f(*args, **kwargs)
_, result = try_wrap_as_index(result)
return result
g.__name__ = f.__name__
return g
# We allow the wraopping of slot methods that are not inherited from object
# It will include operation methods like __add__ and __contains__
allow_list = set(x for x in dir(klass) if x.startswith('__')) - set(dir(object))
# Wrap class members of the newly created class
for name, member in getmembers(klass):
if name.startswith('_') and name not in allow_list:
continue
if isfunction(member):
if name == 'union':
member = wrap_union(member)
else:
member = wrap_function(member)
setattr(klass, name, member)
elif type(member) is property:
if type(member.fget) is partial:
func = CreateWrapperClass(member.fget.func)
fget = partial(func, name)
else:
fget = wrap_function(member.fget)
member = property(fget, member.fset, member.fdel, member.__doc__)
setattr(klass, name, member)
return klass
FrozenList = CreateWrapperClass(pdFrozenList)
Index = CreateWrapperClass(pd.Index)
MultiIndex = CreateWrapperClass(pd.MultiIndex)
Series = CreateWrapperClass(pd.Series)
DataFrame = CreateWrapperClass(pd.DataFrame)
wrap_special_function('groupby', Series, SeriesGroupBy)
wrap_special_function('groupby', DataFrame, DataFrameGroupBy)
wrap_special_function('ewm', Series, EWM)
wrap_special_function('ewm', DataFrame, EWM)
wrap_special_function('expanding', Series, Expanding)
wrap_special_function('expanding', DataFrame, Expanding)
wrap_special_function('rolling', Series, Rolling, Window)
wrap_special_function('rolling', DataFrame, Rolling, Window)
CreateSeries = pd.Series
setattr(modules[__name__], 'concat', wrap_function(pd.concat))");
}
}
var enumerable = data as IEnumerable;
if (enumerable != null)
{
foreach (var item in enumerable)
{
data = item;
}
}
var type = data.GetType();
IsCustomData = type.Namespace != typeof(Bar).Namespace;
_members = new List<MemberInfo>();
_symbol = ((IBaseData)data).Symbol;
if (_symbol.SecurityType == SecurityType.Future) Levels = 3;
if (_symbol.SecurityType == SecurityType.Option || _symbol.SecurityType == SecurityType.FutureOption) Levels = 5;
var columns = new HashSet<string>
{
"open", "high", "low", "close", "lastprice", "volume",
"askopen", "askhigh", "asklow", "askclose", "askprice", "asksize", "quantity", "suspicious",
"bidopen", "bidhigh", "bidlow", "bidclose", "bidprice", "bidsize", "exchange", "openinterest"
};
if (IsCustomData)
{
var keys = (data as DynamicData)?.GetStorageDictionary().ToHashSet(x => x.Key);
// C# types that are not DynamicData type
if (keys == null)
{
if (_membersByType.TryGetValue(type, out _members))
{
keys = _members.ToHashSet(x => x.Name.ToLowerInvariant());
}
else
{
var members = type.GetMembers().Where(x => x.MemberType == MemberTypes.Field || x.MemberType == MemberTypes.Property).ToList();
var duplicateKeys = members.GroupBy(x => x.Name.ToLowerInvariant()).Where(x => x.Count() > 1).Select(x => x.Key);
foreach (var duplicateKey in duplicateKeys)
{
throw new ArgumentException($"PandasData.ctor(): More than one \'{duplicateKey}\' member was found in \'{type.FullName}\' class.");
}
// If the custom data derives from a Market Data (e.g. Tick, TradeBar, QuoteBar), exclude its keys
keys = members.ToHashSet(x => x.Name.ToLowerInvariant());
keys.ExceptWith(_baseDataProperties);
keys.ExceptWith(GetPropertiesNames(typeof(QuoteBar), type));
keys.ExceptWith(GetPropertiesNames(typeof(TradeBar), type));
keys.ExceptWith(GetPropertiesNames(typeof(Tick), type));
keys.Add("value");
_members = members.Where(x => keys.Contains(x.Name.ToLowerInvariant())).ToList();
_membersByType.TryAdd(type, _members);
}
}
columns.Add("value");
columns.UnionWith(keys);
}
_series = columns.ToDictionary(k => k, v => Tuple.Create(new List<DateTime>(), new List<object>()));
}
/// <summary>
/// Adds security data object to the end of the lists
/// </summary>
/// <param name="baseData"><see cref="IBaseData"/> object that contains security data</param>
public void Add(object baseData)
{
foreach (var member in _members)
{
var key = member.Name.ToLowerInvariant();
var endTime = ((IBaseData) baseData).EndTime;
var propertyMember = member as PropertyInfo;
if (propertyMember != null)
{
AddToSeries(key, endTime, propertyMember.GetValue(baseData));
continue;
}
var fieldMember = member as FieldInfo;
if (fieldMember != null)
{
AddToSeries(key, endTime, fieldMember.GetValue(baseData));
}
}
var storage = (baseData as DynamicData)?.GetStorageDictionary();
if (storage != null)
{
var endTime = ((IBaseData) baseData).EndTime;
var value = ((IBaseData) baseData).Value;
AddToSeries("value", endTime, value);
foreach (var kvp in storage)
{
AddToSeries(kvp.Key, endTime, kvp.Value);
}
}
else
{
var ticks = new List<Tick> { baseData as Tick };
var tradeBar = baseData as TradeBar;
var quoteBar = baseData as QuoteBar;
Add(ticks, tradeBar, quoteBar);
}
}
/// <summary>
/// Adds Lean data objects to the end of the lists
/// </summary>
/// <param name="ticks">List of <see cref="Tick"/> object that contains tick information of the security</param>
/// <param name="tradeBar"><see cref="TradeBar"/> object that contains trade bar information of the security</param>
/// <param name="quoteBar"><see cref="QuoteBar"/> object that contains quote bar information of the security</param>
public void Add(IEnumerable<Tick> ticks, TradeBar tradeBar, QuoteBar quoteBar)
{
if (tradeBar != null)
{
var time = tradeBar.EndTime;
AddToSeries("open", time, tradeBar.Open);
AddToSeries("high", time, tradeBar.High);
AddToSeries("low", time, tradeBar.Low);
AddToSeries("close", time, tradeBar.Close);
AddToSeries("volume", time, tradeBar.Volume);
}
if (quoteBar != null)
{
var time = quoteBar.EndTime;
if (tradeBar == null)
{
AddToSeries("open", time, quoteBar.Open);
AddToSeries("high", time, quoteBar.High);
AddToSeries("low", time, quoteBar.Low);
AddToSeries("close", time, quoteBar.Close);
}
if (quoteBar.Ask != null)
{
AddToSeries("askopen", time, quoteBar.Ask.Open);
AddToSeries("askhigh", time, quoteBar.Ask.High);
AddToSeries("asklow", time, quoteBar.Ask.Low);
AddToSeries("askclose", time, quoteBar.Ask.Close);
AddToSeries("asksize", time, quoteBar.LastAskSize);
}
if (quoteBar.Bid != null)
{
AddToSeries("bidopen", time, quoteBar.Bid.Open);
AddToSeries("bidhigh", time, quoteBar.Bid.High);
AddToSeries("bidlow", time, quoteBar.Bid.Low);
AddToSeries("bidclose", time, quoteBar.Bid.Close);
AddToSeries("bidsize", time, quoteBar.LastBidSize);
}
}
if (ticks != null)
{
foreach (var tick in ticks)
{
if (tick == null) continue;
var time = tick.EndTime;
var column = tick.TickType == TickType.OpenInterest
? "openinterest"
: "lastprice";
if (tick.TickType == TickType.Quote)
{
AddToSeries("askprice", time, tick.AskPrice);
AddToSeries("asksize", time, tick.AskSize);
AddToSeries("bidprice", time, tick.BidPrice);
AddToSeries("bidsize", time, tick.BidSize);
}
AddToSeries("exchange", time, tick.Exchange);
AddToSeries("suspicious", time, tick.Suspicious);
AddToSeries("quantity", time, tick.Quantity);
AddToSeries(column, time, tick.LastPrice);
}
}
}
/// <summary>
/// Get the pandas.DataFrame of the current <see cref="PandasData"/> state
/// </summary>
/// <param name="levels">Number of levels of the multi index</param>
/// <returns>pandas.DataFrame object</returns>
public PyObject ToPandasDataFrame(int levels = 2)
{
var empty = new PyString(string.Empty);
var list = Enumerable.Repeat<PyObject>(empty, 5).ToList();
list[3] = _symbol.ID.ToString().ToPython();
if (_symbol.SecurityType == SecurityType.Future)
{
list[0] = _symbol.ID.Date.ToPython();
list[3] = _symbol.ID.ToString().ToPython();
}
if (_symbol.SecurityType == SecurityType.Option || _symbol.SecurityType == SecurityType.FutureOption)
{
list[0] = _symbol.ID.Date.ToPython();
list[1] = _symbol.ID.StrikePrice.ToPython();
list[2] = _symbol.ID.OptionRight.ToString().ToPython();
list[3] = _symbol.ID.ToString().ToPython();
}
// Create the index labels
var names = "expiry,strike,type,symbol,time";
if (levels == 2)
{
names = "symbol,time";
list.RemoveRange(0, 3);
}
if (levels == 3)
{
names = "expiry,symbol,time";
list.RemoveRange(1, 2);
}
Func<object, bool> filter = x =>
{
var isNaNOrZero = x is double && ((double)x).IsNaNOrZero();
var isNullOrWhiteSpace = x is string && string.IsNullOrWhiteSpace((string)x);
var isFalse = x is bool && !(bool)x;
return x == null || isNaNOrZero || isNullOrWhiteSpace || isFalse;
};
Func<DateTime, PyTuple> selector = x =>
{
list[list.Count - 1] = x.ToPython();
return new PyTuple(list.ToArray());
};
// creating the pandas MultiIndex is expensive so we keep a cash
var indexCache = new Dictionary<List<DateTime>, dynamic>(new ListComparer<DateTime>());
using (Py.GIL())
{
// Returns a dictionary keyed by column name where values are pandas.Series objects
var pyDict = new PyDict();
var splitNames = names.Split(',');
foreach (var kvp in _series)
{
var values = kvp.Value.Item2;
if (values.All(filter)) continue;
dynamic index;
if (!indexCache.TryGetValue(kvp.Value.Item1, out index))
{
var tuples = kvp.Value.Item1.Select(selector).ToArray();
index = _pandas.MultiIndex.from_tuples(tuples, names: splitNames);
indexCache[kvp.Value.Item1] = index;
}
// Adds pandas.Series value keyed by the column name
// CreateSeries will create an original pandas.Series
// We are not using the wrapper class to avoid unnecessary and expensive
// index wrapping operations when the Series are packed into a DataFrame
pyDict.SetItem(kvp.Key, _pandas.CreateSeries(values, index));
}
_series.Clear();
// Create a DataFrame with wrapper class.
// This is the starting point. The types of all DataFrame and Series that result from any operation will
// be wrapper classes. Index and MultiIndex will be converted when required by index operations such as
// stack, unstack, merge, union, etc.
return _pandas.DataFrame(pyDict);
}
}
/// <summary>
/// Adds data to dictionary
/// </summary>
/// <param name="key">The key of the value to get</param>
/// <param name="time"><see cref="DateTime"/> object to add to the value associated with the specific key</param>
/// <param name="input"><see cref="Object"/> to add to the value associated with the specific key. Can be null.</param>
private void AddToSeries(string key, DateTime time, object input)
{
Tuple<List<DateTime>, List<object>> value;
if (_series.TryGetValue(key, out value))
{
value.Item1.Add(time);
value.Item2.Add(input is decimal ? input.ConvertInvariant<double>() : input);
}
else
{
throw new ArgumentException($"PandasData.AddToSeries(): {key} key does not exist in series dictionary.");
}
}
/// <summary>
/// Get the lower-invariant name of properties of the type that a another type is assignable from
/// </summary>
/// <param name="baseType">The type that is assignable from</param>
/// <param name="type">The type that is assignable by</param>
/// <returns>List of string. Empty list if not assignable from</returns>
private static IEnumerable<string> GetPropertiesNames(Type baseType, Type type)
{
return baseType.IsAssignableFrom(type)
? baseType.GetProperties().Select(x => x.Name.ToLowerInvariant())
: Enumerable.Empty<string>();
}
}
}

View File

@@ -25,6 +25,7 @@
<NuGetPackageImportStamp>
</NuGetPackageImportStamp>
<CodeAnalysisRuleSet>..\QuantConnect.ruleset</CodeAnalysisRuleSet>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
@@ -106,6 +107,9 @@
<CodeAnalysisRuleSet>..\QuantConnect.ruleset</CodeAnalysisRuleSet>
</PropertyGroup>
<ItemGroup>
<Reference Include="Apache.Arrow, Version=2.0.0.0, Culture=neutral, PublicKeyToken=204f54e5a45c07df, processorArchitecture=MSIL">
<HintPath>..\packages\gsalaz98.Unofficial.Apache.Arrow.2.0.1-SNAPSHOT\lib\net461\Apache.Arrow.dll</HintPath>
</Reference>
<Reference Include="CloneExtensions, Version=1.3.0.0, Culture=neutral, processorArchitecture=MSIL">
<HintPath>..\packages\CloneExtensions.1.3.0\lib\net461\CloneExtensions.dll</HintPath>
</Reference>
@@ -124,6 +128,11 @@
<Reference Include="Microsoft.IO.RecyclableMemoryStream, Version=1.3.5.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
<HintPath>..\packages\Microsoft.IO.RecyclableMemoryStream.1.3.5\lib\net46\Microsoft.IO.RecyclableMemoryStream.dll</HintPath>
</Reference>
<Reference Include="Microsoft.Win32.Primitives, Version=4.0.2.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
<HintPath>..\packages\Microsoft.Win32.Primitives.4.3.0\lib\net46\Microsoft.Win32.Primitives.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="mscorlib" />
<Reference Include="Newtonsoft.Json, Version=10.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
<HintPath>..\packages\Newtonsoft.Json.10.0.3\lib\net45\Newtonsoft.Json.dll</HintPath>
</Reference>
@@ -166,6 +175,10 @@
<Reference Include="System.ServiceModel.Primitives, Version=4.7.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<HintPath>..\packages\System.ServiceModel.Primitives.4.7.0\lib\net461\System.ServiceModel.Primitives.dll</HintPath>
</Reference>
<Reference Include="System.Threading.Tasks.Extensions, Version=4.2.0.1, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51">
<HintPath>..\packages\System.Threading.Tasks.Extensions.4.5.4\lib\net461\System.Threading.Tasks.Extensions.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
@@ -401,6 +414,7 @@
<Compile Include="Packets\LeakyBucketControlParameters.cs" />
<Compile Include="Packets\LiveResultParameters.cs" />
<Compile Include="Python\BrokerageMessageHandlerPythonWrapper.cs" />
<Compile Include="Python\PandasArrowMemoryAllocator.cs" />
<Compile Include="Python\PythonConsolidator.cs" />
<Compile Include="Python\MarginCallModelPythonWrapper.cs" />
<Compile Include="Python\PythonInitializer.cs" />
@@ -515,7 +529,6 @@
<Compile Include="Python\PythonActivator.cs" />
<Compile Include="Python\PythonSlice.cs" />
<Compile Include="Python\VolatilityModelPythonWrapper.cs" />
<Compile Include="Python\PandasData.cs" />
<Compile Include="Python\SecurityInitializerPythonWrapper.cs" />
<Compile Include="Python\SlippageModelPythonWrapper.cs" />
<Compile Include="Python\FillModelPythonWrapper.cs" />
@@ -972,4 +985,4 @@
<Target Name="AfterBuild">
</Target>
-->
</Project>
</Project>

View File

@@ -3,6 +3,7 @@
<package id="CloneExtensions" version="1.3.0" targetFramework="net462" />
<package id="DotNetZip" version="1.13.3" targetFramework="net452" />
<package id="fasterflect" version="2.1.3" targetFramework="net45" />
<package id="gsalaz98.Unofficial.Apache.Arrow" version="2.0.1-SNAPSHOT" targetFramework="net462" />
<package id="MathNet.Numerics" version="3.19.0" targetFramework="net452" />
<package id="Microsoft.CodeAnalysis.FxCopAnalyzers" version="2.9.3" targetFramework="net452" />
<package id="Microsoft.CodeAnalysis.VersionCheckAnalyzer" version="2.9.3" targetFramework="net452" />
@@ -24,4 +25,5 @@
<package id="System.Reflection.Emit.Lightweight" version="4.3.0" targetFramework="net452" />
<package id="System.Runtime.CompilerServices.Unsafe" version="4.5.3" targetFramework="net462" />
<package id="System.ServiceModel.Primitives" version="4.7.0" targetFramework="net462" />
<package id="System.Threading.Tasks.Extensions" version="4.5.4" targetFramework="net462" />
</packages>

View File

@@ -66,6 +66,9 @@ RUN conda install -y \
pandas=0.25.3 \
wrapt=1.12.1
# Install pyarrow to improve DataFrame construction performance
RUN pip install pyarrow==1.0.1
# Install non-math packages
RUN conda install -y \
astropy=4.0.1.post1 \
@@ -242,4 +245,4 @@ RUN conda remove --force-remove -y s3transfer
RUN conda clean -y --all
# List all packages
RUN conda list
RUN conda list

View File

@@ -15,6 +15,7 @@
*/
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using NodaTime;
using QuantConnect.Data;
@@ -31,19 +32,20 @@ namespace QuantConnect.Lean.Engine.DataFeeds
public class TimeSliceFactory
{
private readonly DateTimeZone _timeZone;
private static readonly ConcurrentDictionary<Symbol, Symbol> _canonicalSymbols = new ConcurrentDictionary<Symbol,Symbol>();
// performance: these collections are not always used so keep a reference to an empty
// instance to use and avoid unnecessary constructors and allocations
private readonly List<UpdateData<ISecurityPrice>> _emptyCustom = new List<UpdateData<ISecurityPrice>>();
private readonly TradeBars _emptyTradeBars = new TradeBars();
private readonly QuoteBars _emptyQuoteBars = new QuoteBars();
private readonly Ticks _emptyTicks = new Ticks();
private readonly Splits _emptySplits = new Splits();
private readonly Dividends _emptyDividends = new Dividends();
private readonly Delistings _emptyDelistings = new Delistings();
private readonly OptionChains _emptyOptionChains = new OptionChains();
private readonly FuturesChains _emptyFuturesChains = new FuturesChains();
private readonly SymbolChangedEvents _emptySymbolChangedEvents = new SymbolChangedEvents();
private static List<UpdateData<ISecurityPrice>> _emptyCustom = new List<UpdateData<ISecurityPrice>>(0);
private static TradeBars _emptyTradeBars = new TradeBars();
private static QuoteBars _emptyQuoteBars = new QuoteBars();
private static Ticks _emptyTicks = new Ticks();
private static Splits _emptySplits = new Splits();
private static Dividends _emptyDividends = new Dividends();
private static Delistings _emptyDelistings = new Delistings();
private static OptionChains _emptyOptionChains = new OptionChains();
private static FuturesChains _emptyFuturesChains = new FuturesChains();
private static SymbolChangedEvents _emptySymbolChangedEvents = new SymbolChangedEvents();
/// <summary>
/// Creates a new instance
@@ -118,8 +120,6 @@ namespace QuantConnect.Lean.Engine.DataFeeds
FuturesChains futuresChains = null;
SymbolChangedEvents symbolChanges = null;
UpdateEmptyCollections(algorithmTime);
if (universeData.Count > 0)
{
// count universe data
@@ -362,36 +362,19 @@ namespace QuantConnect.Lean.Engine.DataFeeds
return new TimeSlice(utcDateTime, count, slice, data, security, consolidator, custom ?? _emptyCustom, changes, universeData);
}
private void UpdateEmptyCollections(DateTime algorithmTime)
{
// just in case
_emptyTradeBars.Clear();
_emptyQuoteBars.Clear();
_emptyTicks.Clear();
_emptySplits.Clear();
_emptyDividends.Clear();
_emptyDelistings.Clear();
_emptyOptionChains.Clear();
_emptyFuturesChains.Clear();
_emptySymbolChangedEvents.Clear();
_emptyTradeBars.Time
= _emptyQuoteBars.Time
= _emptyTicks.Time
= _emptySplits.Time
= _emptyDividends.Time
= _emptyDelistings.Time
= _emptyOptionChains.Time
= _emptyFuturesChains.Time
= _emptySymbolChangedEvents.Time = algorithmTime;
}
private bool HandleOptionData(DateTime algorithmTime, BaseData baseData, OptionChains optionChains, ISecurityPrice security, Lazy<Slice> sliceFuture, IReadOnlyDictionary<Symbol, BaseData> optionUnderlyingUpdates)
{
var symbol = baseData.Symbol;
OptionChain chain;
var canonical = Symbol.CreateOption(symbol.Underlying, symbol.ID.Market, default(OptionStyle), default(OptionRight), 0, SecurityIdentifier.DefaultDate);
Symbol canonical;
if (!_canonicalSymbols.TryGetValue(symbol, out canonical))
{
canonical = Symbol.CreateOption(symbol.Underlying, symbol.ID.Market, default(OptionStyle), default(OptionRight), 0, SecurityIdentifier.DefaultDate);
_canonicalSymbols.TryAdd(symbol, canonical);
}
if (!optionChains.TryGetValue(canonical, out chain))
{
chain = new OptionChain(canonical, algorithmTime);
@@ -498,7 +481,14 @@ namespace QuantConnect.Lean.Engine.DataFeeds
var symbol = baseData.Symbol;
FuturesChain chain;
var canonical = Symbol.Create(symbol.ID.Symbol, SecurityType.Future, symbol.ID.Market);
Symbol canonical;
if (!_canonicalSymbols.TryGetValue(symbol, out canonical))
{
canonical = Symbol.Create(symbol.ID.Symbol, SecurityType.Future, symbol.ID.Market);
_canonicalSymbols.TryAdd(symbol, canonical);
}
if (!futuresChains.TryGetValue(canonical, out chain))
{
chain = new FuturesChain(canonical, algorithmTime);

View File

@@ -27,6 +27,10 @@
<assemblyIdentity name="System.IO.Compression" publicKeyToken="b77a5c561934e089" culture="neutral" />
<bindingRedirect oldVersion="0.0.0.0-4.2.0.0" newVersion="4.2.0.0" />
</dependentAssembly>
<dependentAssembly>
<assemblyIdentity name="System.Threading.Tasks.Extensions" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
<bindingRedirect oldVersion="0.0.0.0-4.2.0.1" newVersion="4.2.0.1" />
</dependentAssembly>
</assemblyBinding>
</runtime>
<startup><supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.2" /></startup></configuration>

View File

@@ -230,6 +230,14 @@ Global
{D46D2A8D-340C-4B40-8EE6-6BAA7B1198AB}.Release|Any CPU.Build.0 = Release|Any CPU
{D46D2A8D-340C-4B40-8EE6-6BAA7B1198AB}.Release|x64.ActiveCfg = Release|Any CPU
{D46D2A8D-340C-4B40-8EE6-6BAA7B1198AB}.Release|x64.Build.0 = Release|Any CPU
{B3F3B1AC-3912-4020-945C-1DA8814C0A3B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{B3F3B1AC-3912-4020-945C-1DA8814C0A3B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{B3F3B1AC-3912-4020-945C-1DA8814C0A3B}.Debug|x64.ActiveCfg = Debug|Any CPU
{B3F3B1AC-3912-4020-945C-1DA8814C0A3B}.Debug|x64.Build.0 = Debug|Any CPU
{B3F3B1AC-3912-4020-945C-1DA8814C0A3B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{B3F3B1AC-3912-4020-945C-1DA8814C0A3B}.Release|Any CPU.Build.0 = Release|Any CPU
{B3F3B1AC-3912-4020-945C-1DA8814C0A3B}.Release|x64.ActiveCfg = Release|Any CPU
{B3F3B1AC-3912-4020-945C-1DA8814C0A3B}.Release|x64.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@@ -1,10 +1,14 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.2"/>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.2" />
</startup>
<runtime>
<assemblyBinding xmlns="urn:schemas-microsoft-com:asm.v1">
<dependentAssembly>
<assemblyIdentity name="System.Threading.Tasks.Extensions" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
<bindingRedirect oldVersion="0.0.0.0-4.1.1.0" newVersion="4.1.1.0" />
</dependentAssembly>
</assemblyBinding>
</runtime>
</configuration>

View File

@@ -0,0 +1,579 @@
/*
* QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
* Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Linq;
using NUnit.Framework;
using Python.Runtime;
using QuantConnect.Algorithm;
using QuantConnect.Data;
using QuantConnect.Data.Auxiliary;
using QuantConnect.Data.Market;
using QuantConnect.Lean.Engine.DataFeeds;
using QuantConnect.Lean.Engine.HistoricalData;
using QuantConnect.Securities;
namespace QuantConnect.Tests.Python
{
[TestFixture]
public partial class PandasConverterTests
{
private QCAlgorithm _algorithm;
private ZipDataCacheProvider _cacheProvider;
[TestCase(Resolution.Daily)]
[TestCase(Resolution.Hour)]
[TestCase(Resolution.Minute)]
[TestCase(Resolution.Second)]
[TestCase(Resolution.Tick)]
public void EquityTAQSingleSymbol(Resolution resolution)
{
AssertEquity(new[] { Symbols.IBM }, resolution);
}
[TestCase(Resolution.Daily)]
[TestCase(Resolution.Hour)]
[TestCase(Resolution.Minute)]
[TestCase(Resolution.Second)]
[TestCase(Resolution.Tick)]
public void EquityTAQMultipleSymbols(Resolution resolution)
{
AssertEquity(new[]
{
Symbols.IBM,
Symbols.SPY,
Symbol.Create("BAC", SecurityType.Equity, Market.USA),
Symbol.Create("AIG", SecurityType.Equity, Market.USA)
}, resolution);
}
[TestCase(Resolution.Daily)]
[TestCase(Resolution.Hour)]
[TestCase(Resolution.Minute)]
[TestCase(Resolution.Second)]
[TestCase(Resolution.Tick)]
public void ForexSingleSymbol(Resolution resolution)
{
AssertForex(new[] { Symbols.EURUSD }, resolution, new DateTime(2014, 5, 1), new DateTime(2014, 5, 5));
}
[TestCase(Resolution.Daily)]
[TestCase(Resolution.Hour)]
[TestCase(Resolution.Minute)]
[TestCase(Resolution.Second)]
[TestCase(Resolution.Tick)]
public void ForexMultipleSymbols(Resolution resolution)
{
AssertForex(new[]
{
Symbols.EURUSD,
Symbol.Create("NZDUSD", SecurityType.Forex, Market.Oanda)
}, resolution, new DateTime(2014, 5, 1), new DateTime(2014, 5, 5));
}
[TestCase(Resolution.Minute)]
public void FuturesSingleSymbol(Resolution resolution)
{
AssertFuture(new[]
{
Symbol.CreateFuture(Futures.Indices.SP500EMini, Market.CME, new DateTime(2013, 12, 20))
}, resolution);
}
[TestCase(Resolution.Minute)]
public void FuturesMultipleSymbols(Resolution resolution)
{
AssertFuture(new[]
{
Symbol.CreateFuture(Futures.Indices.SP500EMini, Market.CME, new DateTime(2013, 12, 20)),
Symbol.CreateFuture(Futures.Indices.SP500EMini, Market.CME, new DateTime(2014, 3, 21)),
Symbol.CreateFuture(Futures.Indices.SP500EMini, Market.CME, new DateTime(2014, 6, 20)),
}, resolution);
}
[TestCase(Resolution.Minute)]
public void OptionSingleSymbol(Resolution resolution)
{
AssertOption(new[]
{
Symbol.CreateOption(
Symbol.Create("AAPL", SecurityType.Equity, Market.USA),
Market.USA,
OptionStyle.American,
OptionRight.Call,
650m,
new DateTime(2014, 6, 13))
}, resolution, new DateTime(2014, 6, 6), new DateTime(2014, 6, 7));
}
[TestCase(Resolution.Minute)]
public void OptionMultipleSymbols(Resolution resolution)
{
AssertOption(new[]
{
Symbol.CreateOption(
Symbol.Create("AAPL", SecurityType.Equity, Market.USA),
Market.USA,
OptionStyle.American,
OptionRight.Call,
650m,
new DateTime(2014, 6, 6)),
Symbol.CreateOption(
Symbol.Create("AAPL", SecurityType.Equity, Market.USA),
Market.USA,
OptionStyle.American,
OptionRight.Put,
650m,
new DateTime(2014, 6, 6)),
Symbol.CreateOption(
Symbol.Create("AAPL", SecurityType.Equity, Market.USA),
Market.USA,
OptionStyle.American,
OptionRight.Call,
660m,
new DateTime(2014, 6, 13)),
Symbol.CreateOption(
Symbol.Create("AAPL", SecurityType.Equity, Market.USA),
Market.USA,
OptionStyle.American,
OptionRight.Put,
660m,
new DateTime(2014, 6, 13))
}, resolution, new DateTime(2014, 6, 6), new DateTime(2014, 6, 7));
}
[Test]
public void OpenInterestOnlyOption()
{
// This test ensures that the DataFrame created with only
// open interest data has more than the two default columns: "symbol", "time".
AssertOption(new []
{
Symbol.CreateOption(
Symbol.Create("AAPL", SecurityType.Equity, Market.USA),
Market.USA,
OptionStyle.American,
OptionRight.Call,
750,
new DateTime(2014, 10, 18))
}, Resolution.Minute, new DateTime(2014, 6, 9), new DateTime(2014, 6, 10), openInterestOnly: true);
}
[TestCase(Resolution.Daily)]
[TestCase(Resolution.Minute)]
public void CryptoSingleSymbol(Resolution resolution)
{
AssertCrypto(new[] { Symbols.BTCUSD }, resolution, new DateTime(2018, 4, 4), new DateTime(2018, 4, 6));
}
[TestCase(Resolution.Daily)]
[TestCase(Resolution.Minute)]
public void CryptoMultipleSymbols(Resolution resolution)
{
var symbols = resolution == Resolution.Daily ? new[] { Symbols.BTCUSD } : new[]
{
Symbols.BTCUSD,
Symbols.BTCEUR,
Symbols.ETHUSD
};
AssertCrypto(symbols, resolution, new DateTime(2018, 4, 4), new DateTime(2018, 4, 6));
}
[TestCase(Resolution.Daily)]
[TestCase(Resolution.Hour)]
[TestCase(Resolution.Minute)]
[TestCase(Resolution.Second)]
[TestCase(Resolution.Tick)]
public void CfdSingleSymbol(Resolution resolution)
{
AssertForex(new[]
{
Symbol.Create("XAUUSD", SecurityType.Cfd, Market.Oanda)
}, resolution, new DateTime(2014, 5, 1), new DateTime(2014, 5, 4));
}
private void AssertEquity(IEnumerable<Symbol> symbols, Resolution resolution, DateTime? startDate = null, DateTime? endDate = null)
{
using (Py.GIL())
{
// History request for Tick data with multiple Symbols is the bottleneck when
// it comes to the EquityTAQMultipleSymbols test. Slice.Keys is also extremely
// slow, but must be iterated in order to grab all Symbols from the Slice, including
// custom data Symbols.
var history = History(
symbols,
startDate ?? new DateTime(2013, 10, 7),
endDate ?? (resolution != Resolution.Tick ? new DateTime(2013, 10, 11) : new DateTime(2013, 10, 8)),
resolution
).ToList();
var tbColumns = new[] { "open", "high", "low", "close", "volume" };
var qbColumns = new[] {
"bidopen", "bidhigh", "bidlow", "bidclose", "bidsize",
"askopen", "askhigh", "asklow", "askclose", "asksize"
};
// The "suspicious" column is excluded because all of its values are currently false.
// It would be sandwiched between the "exchange" and "lastprice" columns.
var tickColumns = new[] { "exchange", "lastprice", "quantity", "bidprice", "bidsize", "askprice", "asksize" };
Assert.Greater(history.Count, 0);
dynamic df = _algorithm.PandasConverter.GetDataFrame(history);
var tbs = history.SelectMany(x => x.Bars.Values.Select(y => (BaseData)y)).ToList();
var qbs = history.SelectMany(x => x.QuoteBars.Values.Select(y => (BaseData)y)).ToList();
var taqDataPoints = tbs.Concat(qbs).GroupBy(x => x.EndTime).Sum(kvp => kvp.GroupBy(x => x.Symbol).Count());
var tickLength = history.AsParallel().Select(x => x.Ticks.Values.Sum(y => y.Count)).Sum();
var dataPointsCount = taqDataPoints + tickLength;
Console.WriteLine($"dpts: {dataPointsCount}");
Assert.AreEqual(dataPointsCount, df.__len__().AsManagedObject(typeof(int)));
var pandasColumns = (string[])df.columns.AsManagedObject(typeof(string[]));
if (resolution == Resolution.Daily || resolution == Resolution.Hour)
{
Assert.IsTrue(tbColumns.SequenceEqual(pandasColumns));
}
else if (resolution == Resolution.Minute || resolution == Resolution.Second)
{
Assert.IsTrue(tbColumns.Concat(qbColumns).SequenceEqual(pandasColumns));
}
else
{
Assert.IsTrue(tickColumns.SequenceEqual(pandasColumns));
}
var pandasIndexes = (string[])df.index.names.AsManagedObject(typeof(string[]));
Assert.IsTrue(new[] { "symbol", "time" }.SequenceEqual(pandasIndexes));
Assert.DoesNotThrow(() =>
{
var locals = new PyDict();
locals.SetItem("df", df);
foreach (var symbol in symbols)
{
PythonEngine.Eval($"df.loc[\"{symbol.ID}\"]", null, locals.Handle);
}
});
}
}
private void AssertForex(IEnumerable<Symbol> symbols, Resolution resolution, DateTime? startDate = null, DateTime? endDate = null)
{
using (Py.GIL())
{
var history = History(
symbols,
startDate ?? new DateTime(2013, 10, 7),
endDate ?? (resolution != Resolution.Tick ? new DateTime(2013, 10, 11) : new DateTime(2013, 10, 8)),
resolution
).ToList();
var qbColumns = new[] {
// When we have no Trades, we set "OHLC" to the QuoteBar's OHLC properties.
// We expect the "volume" column to be dropped, as it will be full of NaN values.
// Quotes only data will not have BidSize/AskSize
"open", "high", "low", "close",
"bidopen", "bidhigh", "bidlow", "bidclose",
"askopen", "askhigh", "asklow", "askclose"
};
// The "suspicious" and "exchange" columns are excluded because all of its values are filtered out.
var tickColumns = new[]
{
"lastprice",
"bidprice",
"askprice"
};
Assert.Greater(history.Count, 0);
dynamic df = _algorithm.PandasConverter.GetDataFrame(history);
var qbs = history.SelectMany(x => x.QuoteBars.Values.Select(y => (BaseData)y)).ToList();
var taqDataPoints = qbs.GroupBy(x => x.EndTime).Sum(kvp => kvp.GroupBy(x => x.Symbol).Count());
var tickLength = history.AsParallel().Select(x => x.Ticks.Values.Sum(y => y.Count)).Sum();
var dataPointsCount = taqDataPoints + tickLength;
Console.WriteLine($"dpts: {dataPointsCount}");
var pandasColumns = (string[])df.columns.AsManagedObject(typeof(string[]));
Assert.AreEqual(dataPointsCount, df.__len__().AsManagedObject(typeof(int)));
if (resolution != Resolution.Tick)
{
Assert.IsTrue(qbColumns.SequenceEqual(pandasColumns));
}
else
{
Assert.IsTrue(tickColumns.SequenceEqual(pandasColumns));
}
var pandasIndexes = (string[])df.index.names.AsManagedObject(typeof(string[]));
Assert.IsTrue(new[] { "symbol", "time" }.SequenceEqual(pandasIndexes));
Assert.DoesNotThrow(() =>
{
var locals = new PyDict();
locals.SetItem("df", df);
foreach (var symbol in symbols)
{
PythonEngine.Eval($"df.loc[\"{symbol.ID}\"]", null, locals.Handle);
}
});
}
}
private void AssertFuture(IEnumerable<Symbol> symbols, Resolution resolution, DateTime? startDate = null, DateTime? endDate = null)
{
using (Py.GIL())
{
var history = History(
symbols,
startDate ?? new DateTime(2013, 10, 7),
endDate ?? (resolution != Resolution.Tick ? new DateTime(2013, 10, 11) : new DateTime(2013, 10, 8)),
resolution
).ToList();
var futureColumns = new[] {
"open", "high", "low", "close", "volume",
"bidopen", "bidhigh", "bidlow", "bidclose", "bidsize",
"askopen", "askhigh", "asklow", "askclose", "asksize",
"openinterest"
};
// The "suspicious" and "exchange" columns are excluded because all of its values are filtered out.
var tickColumns = new[]
{
"lastprice", "quantity",
"bidprice", "bidsize",
"askprice", "asksize",
"openinterest"
};
Assert.Greater(history.Count, 0);
dynamic df = _algorithm.PandasConverter.GetDataFrame(history);
var tbs = history.SelectMany(x => x.Bars.Values.Select(y => (BaseData)y)).ToList();
var qbs = history.SelectMany(x => x.QuoteBars.Values.Select(y => (BaseData)y)).ToList();
var oi = history.SelectMany(x => x.Ticks.Values.Where(t => t is OpenInterest).SelectMany(t => t)).ToList();
var tbQbOiDataPoints = tbs.Concat(qbs).Concat(oi).GroupBy(x => x.EndTime).Sum(kvp => kvp.GroupBy(x => x.Symbol).Count());
var tickLength = history.AsParallel().Sum(x => x.Ticks.Values.SelectMany(t => t).Count(t => !(t is OpenInterest)));
var dataPointsCount = tbQbOiDataPoints + tickLength;
Console.WriteLine($"dpts: {dataPointsCount}");
Assert.AreEqual(dataPointsCount, df.__len__().AsManagedObject(typeof(int)));
var pandasColumns = (string[])df.columns.AsManagedObject(typeof(string[]));
if (resolution != Resolution.Tick)
{
Assert.IsTrue(futureColumns.SequenceEqual(pandasColumns));
}
else
{
Assert.IsTrue(tickColumns.SequenceEqual(pandasColumns));
}
var pandasIndexes = (string[])df.index.names.AsManagedObject(typeof(string[]));
Assert.IsTrue(new[] { "expiry", "symbol", "time" }.SequenceEqual(pandasIndexes));
}
}
private void AssertOption(IEnumerable<Symbol> symbols, Resolution resolution, DateTime? startDate = null, DateTime? endDate = null, bool openInterestOnly = false)
{
using (Py.GIL())
{
var history = History(
symbols,
startDate ?? new DateTime(2013, 10, 7),
endDate ?? (resolution != Resolution.Tick ? new DateTime(2013, 10, 11) : new DateTime(2013, 10, 8)),
resolution
).ToList();
var optionColumns = new[] {
"open", "high", "low", "close", "volume",
"bidopen", "bidhigh", "bidlow", "bidclose", "bidsize",
"askopen", "askhigh", "asklow", "askclose", "asksize",
"openinterest"
};
Assert.Greater(history.Count, 0);
dynamic df = _algorithm.PandasConverter.GetDataFrame(history);
var tbs = history.SelectMany(x => x.Bars.Values.Select(y => (BaseData)y)).ToList();
var qbs = history.SelectMany(x => x.QuoteBars.Values.Select(y => (BaseData)y)).ToList();
var oi = history.SelectMany(x => x.Ticks.Values.Select(ticks => ticks.Where(t => t is OpenInterest)).SelectMany(t => t)).ToList();
var dataPointsCount = tbs.Concat(qbs).Concat(oi).GroupBy(x => x.EndTime).Sum(kvp => kvp.GroupBy(x => x.Symbol).Count());
Assert.AreEqual(dataPointsCount, df.__len__().AsManagedObject(typeof(int)));
var pandasColumns = (string[])df.columns.AsManagedObject(typeof(string[]));
if (openInterestOnly)
{
Assert.IsTrue(new[] { "openinterest" }.SequenceEqual(pandasColumns));
}
else
{
Assert.IsTrue(optionColumns.SequenceEqual(pandasColumns));
}
var pandasIndexes = (string[])df.index.names.AsManagedObject(typeof(string[]));
Assert.IsTrue(new[] { "expiry", "strike", "type", "symbol", "time" }.SequenceEqual(pandasIndexes));
Assert.DoesNotThrow(() =>
{
var locals = new PyDict();
locals.SetItem("datetime", Py.Import("datetime"));
locals.SetItem("df", df);
foreach (var symbol in symbols)
{
PythonEngine.Eval($"df.loc[datetime.datetime({symbol.ID.Date.Year}, {symbol.ID.Date.Month}, {symbol.ID.Date.Day})].loc[{symbol.ID.StrikePrice.ToStringInvariant()}].loc['{symbol.ID.OptionRight.ToString()}'].loc['{symbol.ID.ToString()}']", null, locals.Handle);
}
});
}
}
private void AssertCrypto(IEnumerable<Symbol> symbols, Resolution resolution, DateTime startDate, DateTime endDate)
{
using (Py.GIL())
{
// History request for Tick data with multiple Symbols is the bottleneck when
// it comes to the EquityTAQMultipleSymbols test. Slice.Keys is also extremely
// slow, but must be iterated in order to grab all Symbols from the Slice, including
// custom data Symbols.
var history = History(
symbols,
startDate,
endDate,
resolution
).ToList();
var cryptoColumns = new[] {
"open", "high", "low", "close", "volume",
"bidopen", "bidhigh", "bidlow", "bidclose", "bidsize",
"askopen", "askhigh", "asklow", "askclose", "asksize"
};
// The "suspicious" column is excluded because all of its values are currently false.
// It would be sandwiched between the "exchange" and "lastprice" columns.
var tickColumns = new[] { "exchange", "lastprice", "quantity", "bidprice", "bidsize", "askprice", "asksize" };
Assert.Greater(history.Count, 0);
dynamic df = _algorithm.PandasConverter.GetDataFrame(history);
var tbs = history.SelectMany(x => x.Bars.Values.Select(y => (BaseData)y)).ToList();
var qbs = history.SelectMany(x => x.QuoteBars.Values.Select(y => (BaseData)y)).ToList();
var taqDataPoints = tbs.Concat(qbs).GroupBy(x => x.EndTime).Sum(kvp => kvp.GroupBy(x => x.Symbol).Count());
var tickLength = history.AsParallel().Select(x => x.Ticks.Values.Sum(y => y.Count)).Sum();
var dataPointsCount = taqDataPoints + tickLength;
Console.WriteLine($"dpts: {dataPointsCount}");
Assert.AreEqual(dataPointsCount, df.__len__().AsManagedObject(typeof(int)));
var pandasColumns = (string[])df.columns.AsManagedObject(typeof(string[]));
if (resolution != Resolution.Tick)
{
Assert.IsTrue(cryptoColumns.SequenceEqual(pandasColumns));
}
else
{
Assert.IsTrue(tickColumns.SequenceEqual(pandasColumns));
}
var pandasIndexes = (string[])df.index.names.AsManagedObject(typeof(string[]));
Assert.IsTrue(new[] { "symbol", "time" }.SequenceEqual(pandasIndexes));
Assert.DoesNotThrow(() =>
{
var locals = new PyDict();
locals.SetItem("df", df);
foreach (var symbol in symbols)
{
PythonEngine.Eval($"df.loc[\"{symbol.ID}\"]", null, locals.Handle);
}
});
}
}
private IEnumerable<Slice> History(IEnumerable<Symbol> symbols, DateTime start, DateTime end, Resolution resolution)
{
if (_algorithm != null)
{
return _algorithm.History(symbols, start, end, resolution);
}
_algorithm = new QCAlgorithm();
_algorithm.SetPandasConverter();
var dataFeed = new NullDataFeed();
_algorithm.SubscriptionManager = new SubscriptionManager();
_algorithm.SubscriptionManager.SetDataManager(new DataManager(
dataFeed,
new UniverseSelection(
_algorithm,
new SecurityService(
new CashBook(),
MarketHoursDatabase.FromDataFolder(),
SymbolPropertiesDatabase.FromDataFolder(),
_algorithm,
null,
null
),
new DataPermissionManager(),
new DefaultDataProvider()
),
_algorithm,
new TimeKeeper(DateTime.UtcNow),
MarketHoursDatabase.FromDataFolder(),
false,
null,
new DataPermissionManager()
));
_cacheProvider = new ZipDataCacheProvider(new DefaultDataProvider());
_algorithm.HistoryProvider = new SubscriptionDataReaderHistoryProvider();
_algorithm.HistoryProvider.Initialize(
new HistoryProviderInitializeParameters(
null,
null,
null,
_cacheProvider,
new LocalDiskMapFileProvider(),
new LocalDiskFactorFileProvider(),
(_) => {},
false,
new DataPermissionManager()));
_algorithm.SetStartDate(DateTime.UtcNow.AddDays(-1));
return _algorithm.History(symbols, start, end, resolution);
}
[TearDown]
public void Dispose()
{
_cacheProvider?.Dispose();
}
}
}

View File

@@ -56,7 +56,7 @@ namespace QuantConnect.Tests.Python
var rawBars = Enumerable.Empty<TradeBar>().ToArray();
// GetDataFrame with argument of type IEnumerable<TradeBar>
dynamic dataFrame = converter.GetDataFrame(rawBars);
dynamic dataFrame = converter.GetDataFrame(new[] { new Slice(default(DateTime), rawBars) });
using (Py.GIL())
{
@@ -85,7 +85,7 @@ namespace QuantConnect.Tests.Python
.ToArray();
// GetDataFrame with argument of type IEnumerable<TradeBar>
dynamic dataFrame = converter.GetDataFrame(rawBars);
dynamic dataFrame = converter.GetDataFrame(rawBars.Select(x => new Slice(x.EndTime, new[] { x })));
using (Py.GIL())
{
@@ -140,7 +140,7 @@ namespace QuantConnect.Tests.Python
.ToArray();
// GetDataFrame with argument of type IEnumerable<QuoteBar>
dynamic dataFrame = converter.GetDataFrame(rawBars);
dynamic dataFrame = converter.GetDataFrame(rawBars.Select(x => new Slice(x.EndTime, new[] { x })));
using (Py.GIL())
{
@@ -198,11 +198,12 @@ namespace QuantConnect.Tests.Python
})
.ToArray();
rawBars[0].NullableInt = 0;
// Set to 1, otherwise the entire column will be dropped because all values were NaN and/or 0
rawBars[0].NullableInt = 1;
rawBars[1].NullableTime = new DateTime(2020, 1, 2);
// GetDataFrame with argument of type IEnumerable<QuoteBar>
dynamic dataFrame = converter.GetDataFrame(rawBars);
dynamic dataFrame = converter.GetDataFrame(rawBars.Select(x => new Slice(x.EndTime, new[] { x })));
using (Py.GIL())
{
@@ -1405,7 +1406,7 @@ def Test(dataFrame, symbol):
dynamic test = PythonEngine.ModuleFromString("testModule",
$@"
def Test(dataFrame, symbol):
data = dataFrame.lastprice.unstack(0)
data = dataFrame.lastprice.unstack(0)
data = data.resample('2S').sum()
data = data[{index}]
if data is 0:
@@ -1943,7 +1944,7 @@ def Test(dataFrame, symbol):
dynamic test = PythonEngine.ModuleFromString("testModule",
$@"
def Test(dataFrame, symbol):
series = dataFrame.lastprice.droplevel(0)
series = dataFrame.lastprice.droplevel(0)
data = series.asfreq(freq='30S')").GetAttr("Test");
Assert.DoesNotThrow(() => test(GetTestDataFrame(Symbols.SPY), Symbols.SPY));
@@ -2004,7 +2005,7 @@ def Test(dataFrame, symbol):
dynamic test = PythonEngine.ModuleFromString("testModule",
$@"
def Test(dataFrame, symbol):
series = dataFrame.lastprice.droplevel(0)
series = dataFrame.lastprice.droplevel(0)
data = series.at_time('04:00')").GetAttr("Test");
Assert.DoesNotThrow(() => test(GetTestDataFrame(Symbols.SPY), Symbols.SPY));
@@ -2042,7 +2043,7 @@ def Test(dataFrame, symbol):
dynamic test = PythonEngine.ModuleFromString("testModule",
$@"
def Test(dataFrame, symbol):
series = dataFrame.lastprice.droplevel(0)
series = dataFrame.lastprice.droplevel(0)
data = series.between_time('02:00', '06:00')").GetAttr("Test");
Assert.DoesNotThrow(() => test(GetTestDataFrame(Symbols.SPY), Symbols.SPY));
@@ -2217,7 +2218,7 @@ def Test(dataFrame, symbol):
dynamic test = PythonEngine.ModuleFromString("testModule",
$@"
def Test(dataFrame, symbol):
series = dataFrame.lastprice.droplevel(0)
series = dataFrame.lastprice.droplevel(0)
data = series.first('2S')").GetAttr("Test");
Assert.DoesNotThrow(() => test(GetTestDataFrame(Symbols.SPY, 10), Symbols.SPY));
@@ -2302,7 +2303,7 @@ def Test(df, other, symbol):
dynamic test = PythonEngine.ModuleFromString("testModule",
$@"
def Test(dataFrame, symbol):
series = dataFrame.lastprice.droplevel(0)
series = dataFrame.lastprice.droplevel(0)
data = series.last('2S')").GetAttr("Test");
Assert.DoesNotThrow(() => test(GetTestDataFrame(Symbols.SPY, 10), Symbols.SPY));
@@ -2868,7 +2869,7 @@ def Test(dataFrame, symbol):
.ToArray();
// GetDataFrame with argument of type IEnumerable<QuoteBar>
dynamic dataFrame = converter.GetDataFrame(rawBars);
dynamic dataFrame = converter.GetDataFrame(rawBars.Select(x => new Slice(x.EndTime, new[] { x })));
using (Py.GIL())
{
@@ -2929,7 +2930,7 @@ def Test(dataFrame, symbol):
.ToArray();
// GetDataFrame with argument of type IEnumerable<QuoteBar>
dynamic dataFrame = converter.GetDataFrame(rawBars);
dynamic dataFrame = converter.GetDataFrame(rawBars.Select(x => new Slice(x.EndTime, new[] { x })));
using (Py.GIL())
{
@@ -3002,14 +3003,28 @@ def Test(dataFrame, symbol):
}
// Act
dynamic dataFrame = converter.GetDataFrame(openinterest);
dynamic dataFrame = converter.GetDataFrame(openinterest.Select(x => new Slice(x.EndTime, new[] { x })));
//Assert
using (Py.GIL())
{
Assert.IsFalse(dataFrame.empty.AsManagedObject(typeof(bool)));
var subDataFrame = dataFrame.loc[symbol];
dynamic datetime = Py.Import("datetime");
dynamic subDataFrame;
if (symbol.SecurityType == SecurityType.Future)
{
subDataFrame = dataFrame.loc[datetime.datetime(symbol.ID.Date.Year, symbol.ID.Date.Month, symbol.ID.Date.Day)].loc[symbol];
}
else
{
var locals = new PyDict();
locals.SetItem("df", dataFrame);
locals.SetItem("datetime", datetime);
subDataFrame = PythonEngine.Eval($"df.loc[datetime.datetime({symbol.ID.Date.Year}, {symbol.ID.Date.Month}, {symbol.ID.Date.Day})].loc[{symbol.ID.StrikePrice.ToStringInvariant()}].loc['{symbol.ID.OptionRight.ToString()}'].loc['{symbol}']", null, locals.Handle);
}
Assert.IsFalse(subDataFrame.empty.AsManagedObject(typeof(bool)));
Assert.IsTrue(subDataFrame.get("openinterest") != null);
@@ -3049,7 +3064,7 @@ def Test(dataFrame, symbol):
.ToArray();
// GetDataFrame with argument of type IEnumerable<BaseData>
dynamic dataFrame = converter.GetDataFrame(rawBars);
dynamic dataFrame = converter.GetDataFrame(rawBars.Select(x => new Slice(x.EndTime, new[] { x })));
using (Py.GIL())
{
@@ -3122,7 +3137,7 @@ def Test(dataFrame, symbol):
.ToArray();
// GetDataFrame with argument of type IEnumerable<BaseData>
dynamic dataFrame = converter.GetDataFrame(rawBars);
dynamic dataFrame = converter.GetDataFrame(rawBars.Select(x => new Slice(x.EndTime, new[] { x })));
using (Py.GIL())
{
@@ -3184,7 +3199,7 @@ def Test(dataFrame, symbol):
var data = leanDataReader.Parse();
var converter = new PandasConverter();
// Act
dynamic df = converter.GetDataFrame(data);
dynamic df = converter.GetDataFrame(data.Select(x => new Slice(x.EndTime, new[] { x })));
// Assert
Assert.AreEqual(rowsInfile, df.shape[0].AsManagedObject(typeof(int)));
@@ -3210,7 +3225,7 @@ def Test(dataFrame, symbol):
var data = leanDataReader.Parse();
var converter = new PandasConverter();
// Act
dynamic df = converter.GetDataFrame(data);
dynamic df = converter.GetDataFrame(data.Select(x => new Slice(x.EndTime, new[] { x })));
// Assert
Assert.AreEqual(rowsInfile, df.shape[0].AsManagedObject(typeof(int)));
@@ -3333,4 +3348,4 @@ def Test(dataFrame, symbol):
public double? NullableColumn { get; set; }
}
}
}
}

View File

@@ -358,7 +358,7 @@ from hmmlearn import hmm
def RunTest():
# Build an HMM instance and set parameters
model = hmm.GaussianHMM(n_components=4, covariance_type='full')
# Instead of fitting it from the data, we directly set the estimated
# parameters, the means and covariance of the components
model.startprob_ = np.array([0.6, 0.3, 0.1, 0.0])
@@ -422,7 +422,7 @@ def RunTest():
]) + np.random.normal(0, 1, X.shape[0])
TRUE_PROB = expit(LINEAR_TERM)
Y = np.random.binomial(1, TRUE_PROB, size=N)
return {
'X': X,
'probability_labels': TRUE_PROB,
@@ -480,7 +480,7 @@ def RunTest():
error_action='ignore', # don't want to know if an order does not work
suppress_warnings=True, # don't want convergence warnings
stepwise=True) # set to stepwise
return stepwise_fit.summary()"
);
}
@@ -673,7 +673,7 @@ def RunTest():
mvo = MeanVarianceOptimisation()
mvo.allocate(asset_prices=stock_prices, solution='inverse_variance', resample_by='B')
ivp_weights = mvo.weights.sort_values(by=0, ascending=False, axis=1)
return f'HRP: {hrp_weights} IVP: {ivp_weights}'"
);
}
@@ -820,7 +820,7 @@ def RunTest():
# getting parameters
p = cop.params
# cop.params = ... # you can override parameters too, even after it's fitted!
# cop.params = ... # you can override parameters too, even after it's fitted!
# get a summary of the copula. If it's fitted, fit details will be present too
return cop.summary()"
@@ -882,6 +882,7 @@ def RunTest():
[TestCase("cntk", "2.7", "__version__")]
[TestCase("featuretools", "0.13.4", "__version__")]
[TestCase("pennylane", "0.8.1", "version()")]
[TestCase("pyarrow", "1.0.1", "__version__")]
public void ModuleVersionTest(string module, string value, string attribute)
{
AssetCode(

View File

@@ -780,6 +780,7 @@
<Compile Include="Optimizer\Strategies\GridSearchOptimizationStrategyTests.cs" />
<Compile Include="Optimizer\Objectives\ConstraintTests.cs" />
<Compile Include="Optimizer\Objectives\TargetTests.cs" />
<Compile Include="Python\PandasConverterTests.Arrow.cs" />
<Compile Include="Research\QuantBookFundamentalTests.cs" />
<Compile Include="Python\PythonOptionTests.cs" />
<Compile Include="Report\DrawdownCollectionTests.cs" />

View File

@@ -26,6 +26,10 @@
<assemblyIdentity name="System.Collections.Immutable" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
<bindingRedirect oldVersion="0.0.0.0-1.2.5.0" newVersion="1.2.5.0" />
</dependentAssembly>
<dependentAssembly>
<assemblyIdentity name="System.Threading.Tasks.Extensions" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
<bindingRedirect oldVersion="0.0.0.0-4.2.0.1" newVersion="4.2.0.1" />
</dependentAssembly>
</assemblyBinding>
</runtime>
<startup><supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.2" /></startup></configuration>

View File

@@ -18,6 +18,10 @@
<assemblyIdentity name="System.IO.Compression" publicKeyToken="b77a5c561934e089" culture="neutral" />
<bindingRedirect oldVersion="0.0.0.0-4.2.0.0" newVersion="4.2.0.0" />
</dependentAssembly>
<dependentAssembly>
<assemblyIdentity name="System.Threading.Tasks.Extensions" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
<bindingRedirect oldVersion="0.0.0.0-4.1.1.0" newVersion="4.1.1.0" />
</dependentAssembly>
</assemblyBinding>
</runtime>
<startup><supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.2" /></startup></configuration>